[PATCH v3 08/10] drm/amd: Use `pm_suspend_default_s2idle`

2022-06-30 Thread Mario Limonciello
Rather than examining the suspend target, examine what the system is
configured to use.  This should be no functional change, just improves
readability by taking the helper instead.

Signed-off-by: Mario Limonciello 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
index 2146232c62ef..fc2c6e311979 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
@@ -1056,7 +1056,7 @@ bool amdgpu_acpi_should_gpu_reset(struct amdgpu_device 
*adev)
 {
if (adev->flags & AMD_IS_APU)
return false;
-   return pm_suspend_target_state != PM_SUSPEND_TO_IDLE;
+   return !pm_suspend_default_s2idle();
 }
 
 /**
@@ -1069,7 +1069,7 @@ bool amdgpu_acpi_should_gpu_reset(struct amdgpu_device 
*adev)
 bool amdgpu_acpi_is_s0ix_active(struct amdgpu_device *adev)
 {
if (!(adev->flags & AMD_IS_APU) ||
-   (pm_suspend_target_state != PM_SUSPEND_TO_IDLE))
+   !pm_suspend_default_s2idle())
return false;
 
if (!pm_suspend_preferred_s2idle()) {
-- 
2.34.1



[PATCH v3 07/10] drm/amd: Use `pm_suspend_preferred_s2idle`

2022-06-30 Thread Mario Limonciello
Drop the direct check from the FADT and use the helper instead.

Signed-off-by: Mario Limonciello 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
index 98ac53ee6bb5..2146232c62ef 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
@@ -1072,7 +1072,7 @@ bool amdgpu_acpi_is_s0ix_active(struct amdgpu_device 
*adev)
(pm_suspend_target_state != PM_SUSPEND_TO_IDLE))
return false;
 
-   if (!(acpi_gbl_FADT.flags & ACPI_FADT_LOW_POWER_S0)) {
+   if (!pm_suspend_preferred_s2idle()) {
dev_warn_once(adev->dev,
  "Power consumption will be higher as BIOS has not 
been configured for suspend-to-idle.\n"
  "To use suspend-to-idle change the sleep mode in 
BIOS setup.\n");
-- 
2.34.1



[PATCH] drm/amd/display: Remove unused variables from vba_vars_st

2022-06-30 Thread Maíra Canal
Some variables from the struct vba_vars_st are not referenced in any
other place on the codebase. As they are not used, this commit removes
those variables.

Signed-off-by: Maíra Canal 
---

Unused variables from structs are not warned by compilers, so they are a bit
harder to find. In order to find these unused variables, I used git grep and
checked if they were used anywhere else.

Any feedback or suggestion (maybe a tool to check unused variables from structs)
is welcomed!

Best Regards,
- Maíra Canal

---
 .../drm/amd/display/dc/dml/display_mode_vba.c |  1 -
 .../drm/amd/display/dc/dml/display_mode_vba.h | 33 ---
 2 files changed, 34 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c 
b/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c
index ed23c7c79d86..6b3918609d26 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c
@@ -346,7 +346,6 @@ static void fetch_socbb_params(struct display_mode_lib 
*mode_lib)
mode_lib->vba.DRAMClockChangeRequirementFinal = 1;
mode_lib->vba.FCLKChangeRequirementFinal = 1;
mode_lib->vba.USRRetrainingRequiredFinal = 1;
-   mode_lib->vba.ConfigurableDETSizeEnFinal = 0;
mode_lib->vba.AllowForPStateChangeOrStutterInVBlankFinal = 
soc->allow_for_pstate_or_stutter_in_vblank_final;
mode_lib->vba.DRAMClockChangeLatency = 
soc->dram_clock_change_latency_us;
mode_lib->vba.DummyPStateCheck = soc->dram_clock_change_latency_us == 
soc->dummy_pstate_latency_us;
diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h 
b/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h
index 25a9a606ab6f..e95b2199d85a 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h
+++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h
@@ -232,7 +232,6 @@ struct vba_vars_st {
double DISPCLKWithRampingRoundedToDFSGranularity;
double DISPCLKWithoutRampingRoundedToDFSGranularity;
double MaxDispclkRoundedToDFSGranularity;
-   double MaxDppclkRoundedToDFSGranularity;
bool DCCEnabledAnyPlane;
double ReturnBandwidthToDCN;
unsigned int TotalActiveDPP;
@@ -249,7 +248,6 @@ struct vba_vars_st {
double VBlankTime;
double SmallestVBlank;
enum dm_prefetch_modes AllowForPStateChangeOrStutterInVBlankFinal; // 
Mode Support only
-   double DCFCLKDeepSleepPerSurface[DC__NUM_DPP__MAX];
double DCFCLKDeepSleepPerPlane[DC__NUM_DPP__MAX];
double EffectiveDETPlusLBLinesLuma;
double EffectiveDETPlusLBLinesChroma;
@@ -297,7 +295,6 @@ struct vba_vars_st {
double SMNLatency;
double FCLKChangeLatency;
unsigned int MALLAllocatedForDCNFinal;
-   double DefaultGPUVMMinPageSizeKBytes; // Default for the project
double 
MaxAveragePercentOfIdealFabricBWDisplayCanUseInNormalSystemOperation;
double 
MaxAveragePercentOfIdealDRAMBWDisplayCanUseInNormalSystemOperationSTROBE;
double PercentOfIdealDRAMBWReceivedAfterUrgLatencySTROBE;
@@ -819,8 +816,6 @@ struct vba_vars_st {
double dummy8[DC__NUM_DPP__MAX];
double dummy13[DC__NUM_DPP__MAX];
double dummy_double_array[2][DC__NUM_DPP__MAX];
-   unsigned intdummyinteger1ms[DC__NUM_DPP__MAX];
-   doubledummyinteger2ms[DC__NUM_DPP__MAX];
unsigned intdummyinteger3[DC__NUM_DPP__MAX];
unsigned intdummyinteger4[DC__NUM_DPP__MAX];
unsigned intdummyinteger5;
@@ -830,16 +825,7 @@ struct vba_vars_st {
unsigned intdummyinteger9;
unsigned intdummyinteger10;
unsigned intdummyinteger11;
-   unsigned intdummyinteger12;
-   unsigned intdummyinteger30;
-   unsigned intdummyinteger31;
-   unsigned intdummyinteger32;
-   unsigned intdummyintegerarr1[DC__NUM_DPP__MAX];
-   unsigned intdummyintegerarr2[DC__NUM_DPP__MAX];
-   unsigned intdummyintegerarr3[DC__NUM_DPP__MAX];
-   unsigned intdummyintegerarr4[DC__NUM_DPP__MAX];
unsigned intdummy_integer_array[8][DC__NUM_DPP__MAX];
-   unsigned intdummy_integer_array22[22][DC__NUM_DPP__MAX];
 
bool   dummysinglestring;
bool   SingleDPPViewportSizeSupportPerPlane[DC__NUM_DPP__MAX];
@@ -980,7 +966,6 @@ struct vba_vars_st {
double TimePerChromaMetaChunkFlip[DC__NUM_DPP__MAX];
unsigned int DCCCMaxUncompressedBlock[DC__NUM_DPP__MAX];
unsigned int DCCCMaxCompressedBlock[DC__NUM_DPP__MAX];
-   unsigned int DCCCIndependent64ByteBlock[DC__NUM_DPP__MAX];
double VStartupMargin;
bool NotEnoughTimeForDynamicMetadata[DC__NUM_DPP__MAX];
 
@@ -1085,8 +1070,6 @@ struct vba_vars_st {
double WritebackDelayTime[DC__NUM_DPP__MAX];
unsigned int 

Re: [PATCH 0/4] Fix compilation issues when using i386

2022-06-30 Thread Randy Dunlap
Hi--

On 6/30/22 11:58, Alex Deucher wrote:
> On Thu, Jun 30, 2022 at 2:46 PM Rodrigo Siqueira
>  wrote:
>>
>> Fix compilation issues when using i386
>>
>> We recently got feedback from Randy about issues in the x86-32
>> compilation.I was able to reproduce a very similar issue by using:
>>
>> - gcc (Ubuntu 9.4.0-1ubuntu1~20.04.1) 9.4.0
>> - make -j16 ARCH=i386
>> - amd-staging-drm-next
>>
>> I'm able to see these issues:
>>
>> ERROR: modpost: "__nedf2" [drivers/gpu/drm/amd/amdgpu/amdgpu.ko] undefined!
>> ERROR: modpost: "__muldf3" [drivers/gpu/drm/amd/amdgpu/amdgpu.ko] undefined!
>> ERROR: modpost: "__floatunsidf" [drivers/gpu/drm/amd/amdgpu/amdgpu.ko] 
>> undefined!
>> ERROR: modpost: "__umoddi3" [drivers/gpu/drm/amd/amdgpu/amdgpu.ko] undefined!
>>
>> This patchset addresses each compilation issue in its own patch since I
>> want to have an easy way to bisect display code in case of regressions.
>> Please, let me know if you see any issue with these changes and if it
>> fixes the 32-bit compilation. If you still see compilation issues,
>> please, report:
>>
>> - GCC version
>> - Config file
>> - Branch
>>
>> Thanks
>> Siqueira
>>
>> Rodrigo Siqueira (4):
>>   drm/amd/display: Fix __umoddi3 undefined for 32 bit compilation
>>   drm/amd/display: Fix __floatunsidf undefined for 32 bit compilation
>>   drm/amd/display: Fix __muldf3 undefined for 32 bit compilation
>>   drm/amd/display: Fix __nedf2 undefined for 32 bit compilation
> 
> Series is:
> Reviewed-by: Alex Deucher 

Series is:
Acked-by: Randy Dunlap 
Tested-by: Randy Dunlap  # build-tested

Thanks.

> 
> I think this patch is also relevant, if someone can review it as well:
> https://patchwork.freedesktop.org/patch/491429/
> 
> Alex
> 
>>
>>  .../amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c  | 15 +--
>>  .../gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c|  4 +++-
>>  drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr.h   |  2 +-
>>  3 files changed, 13 insertions(+), 8 deletions(-)
>>
>> --
>> 2.25.1
>>

-- 
~Randy


Re: [PATCH 5/5] libhsakmt: allocate unified memory for ctx save restore area

2022-06-30 Thread Felix Kuehling

On 2022-06-30 15:03, Eric Huang wrote:

To improve performance on queue preemption, allocate ctx s/r
  area in VRAM instead of system memory, and migrate it back
  to system memory when VRAM is full.

Signed-off-by: Eric Huang 
Change-Id: If775782027188dbe84b6868260e429373675434c
---
  include/hsakmttypes.h |   1 +
  src/queues.c  | 103 --
  2 files changed, 90 insertions(+), 14 deletions(-)

diff --git a/include/hsakmttypes.h b/include/hsakmttypes.h
index 9063f85..2c1c7cc 100644
--- a/include/hsakmttypes.h
+++ b/include/hsakmttypes.h
@@ -1329,6 +1329,7 @@ typedef enum _HSA_SVM_FLAGS {
HSA_SVM_FLAG_GPU_RO  = 0x0008, // GPUs only read, allows 
replication
HSA_SVM_FLAG_GPU_EXEC= 0x0010, // Allow execution on GPU
HSA_SVM_FLAG_GPU_READ_MOSTLY = 0x0020, // GPUs mostly read, may 
allow similar optimizations as RO, but writes fault
+   HSA_SVM_FLAG_GPU_ALWAYS_MAPPED = 0x0040, // Keep GPU memory mapping 
always valid as if XNACK is disable
  } HSA_SVM_FLAGS;
  
  typedef enum _HSA_SVM_ATTR_TYPE {

diff --git a/src/queues.c b/src/queues.c
index c83dd93..d5109f9 100644
--- a/src/queues.c
+++ b/src/queues.c
@@ -68,6 +68,7 @@ struct queue {
uint32_t eop_buffer_size;
uint32_t gfxv;
bool use_ats;
+   bool unified_ctx_save_restore;
/* This queue structure is allocated from GPU with page aligned size
 * but only small bytes are used. We use the extra space in the end for
 * cu_mask bits array.
@@ -383,13 +384,47 @@ static void free_exec_aligned_memory(void *addr, uint32_t 
size, uint32_t align,
munmap(addr, size);
  }
  
+static HSAKMT_STATUS register_svm_range(void *mem, uint32_t size,

+   uint32_t gpuNode, uint32_t prefetchNode,
+   uint32_t preferredNode, bool alwaysMapped)
+{
+   HSA_SVM_ATTRIBUTE *attrs;
+   HSAuint64 s_attr;
+   HSAuint32 nattr;
+   HSAuint32 flags;
+
+   flags = HSA_SVM_FLAG_HOST_ACCESS;
+
+   if (alwaysMapped)
+   flags |= HSA_SVM_FLAG_GPU_ALWAYS_MAPPED;
+
+   nattr = 5;
+   s_attr = sizeof(*attrs) * nattr;
+   attrs = (HSA_SVM_ATTRIBUTE *)alloca(s_attr);
+
+   attrs[0].type = HSA_SVM_ATTR_PREFETCH_LOC;
+   attrs[0].value = prefetchNode;
+   attrs[1].type = HSA_SVM_ATTR_PREFERRED_LOC;
+   attrs[1].value = preferredNode;
+   attrs[2].type = HSA_SVM_ATTR_CLR_FLAGS;
+   attrs[2].value = ~flags;
+   attrs[3].type = HSA_SVM_ATTR_SET_FLAGS;
+   attrs[3].value = flags;
+   attrs[4].type = HSA_SVM_ATTR_ACCESS;
+   attrs[4].value = gpuNode;
+
+   return hsaKmtSVMSetAttr(mem, size, nattr, attrs);
+}
+
  static void free_queue(struct queue *q)
  {
if (q->eop_buffer)
free_exec_aligned_memory(q->eop_buffer,
 q->eop_buffer_size,
 PAGE_SIZE, q->use_ats);
-   if (q->ctx_save_restore)
+   if (q->unified_ctx_save_restore)
+   free(q->ctx_save_restore);
+   else if (q->ctx_save_restore)
free_exec_aligned_memory(q->ctx_save_restore,
 q->ctx_save_restore_size,
 PAGE_SIZE, q->use_ats);
@@ -425,6 +460,8 @@ static int handle_concrete_asic(struct queue *q,
if (ret) {
uint32_t total_mem_alloc_size = 0;
HsaUserContextSaveAreaHeader *header;
+   HsaNodeProperties node;
+   bool svm_api;
  
  		args->ctx_save_restore_size = q->ctx_save_restore_size;

args->ctl_stack_size = q->ctl_stack_size;
@@ -434,22 +471,60 @@ static int handle_concrete_asic(struct queue *q,
 */
total_mem_alloc_size = q->ctx_save_restore_size +
   q->debug_memory_size;
-   q->ctx_save_restore =
-   allocate_exec_aligned_memory(total_mem_alloc_size,
-q->use_ats, NodeId, false, false);
  
-		if (!q->ctx_save_restore)

-   return HSAKMT_STATUS_NO_MEMORY;
+   if (hsaKmtGetNodeProperties(NodeId, ))
+   svm_api = false;
+   else
+   svm_api = node.Capability.ui32.SVMAPISupported;
  
-		args->ctx_save_restore_address = (uintptr_t)q->ctx_save_restore;

+   /* Allocate unified memory for context save restore
+* area on dGPU.
+*/
+   if (!q->use_ats && svm_api) {
+   uint32_t size = PAGE_ALIGN_UP(total_mem_alloc_size);
+   void *addr;
+   HSAKMT_STATUS r = HSAKMT_STATUS_ERROR;
+
+   if (posix_memalign(, GPU_HUGE_PAGE_SIZE, size))
+   pr_err("[%s] posix_memalign failed:\n", 

Re: [PATCH] drm/amd/display: Add missing hard-float compile flags for PPC64 builds

2022-06-30 Thread Guenter Roeck

On 6/30/22 14:01, Rodrigo Siqueira Jordao wrote:



On 2022-06-18 19:27, Guenter Roeck wrote:

ppc:allmodconfig builds fail with the following error.

powerpc64-linux-ld:
drivers/gpu/drm/amd/amdgpu/../display/dc/dml/display_mode_lib.o
    uses hard float,
drivers/gpu/drm/amd/amdgpu/../display/dc/dcn31/dcn31_resource.o
    uses soft float
powerpc64-linux-ld:
failed to merge target specific data of file
drivers/gpu/drm/amd/amdgpu/../display/dc/dcn31/dcn31_resource.o
powerpc64-linux-ld:
drivers/gpu/drm/amd/amdgpu/../display/dc/dml/display_mode_lib.o
    uses hard float,
drivers/gpu/drm/amd/amdgpu/../display/dc/dcn315/dcn315_resource.o
    uses soft float
powerpc64-linux-ld:
failed to merge target specific data of
file drivers/gpu/drm/amd/amdgpu/../display/dc/dcn315/dcn315_resource.o
powerpc64-linux-ld:
drivers/gpu/drm/amd/amdgpu/../display/dc/dml/display_mode_lib.o
    uses hard float,
drivers/gpu/drm/amd/amdgpu/../display/dc/dcn316/dcn316_resource.o
    uses soft float
powerpc64-linux-ld:
failed to merge target specific data of file
drivers/gpu/drm/amd/amdgpu/../display/dc/dcn316/dcn316_resource.o

The problem was introduced with commit 41b7a347bf14 ("powerpc: Book3S
64-bit outline-only KASAN support") which adds support for KASAN. This
commit in turn enables DRM_AMD_DC_DCN because KCOV_INSTRUMENT_ALL and
KCOV_ENABLE_COMPARISONS are no longer enabled. As result, new files are
compiled which lack the selection of hard-float.

Fixes: 41b7a347bf14 ("powerpc: Book3S 64-bit outline-only KASAN support")
Cc: Michael Ellerman 
Cc: Daniel Axtens 
Signed-off-by: Guenter Roeck 
---
  drivers/gpu/drm/amd/display/dc/dcn31/Makefile  | 4 
  drivers/gpu/drm/amd/display/dc/dcn315/Makefile | 4 
  drivers/gpu/drm/amd/display/dc/dcn316/Makefile | 4 
  3 files changed, 12 insertions(+)

diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/Makefile 
b/drivers/gpu/drm/amd/display/dc/dcn31/Makefile
index ec041e3cda30..74be02114ae4 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn31/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dcn31/Makefile
@@ -15,6 +15,10 @@ DCN31 = dcn31_resource.o dcn31_hubbub.o dcn31_hwseq.o 
dcn31_init.o dcn31_hubp.o
  dcn31_apg.o dcn31_hpo_dp_stream_encoder.o dcn31_hpo_dp_link_encoder.o \
  dcn31_afmt.o dcn31_vpg.o
+ifdef CONFIG_PPC64
+CFLAGS_$(AMDDALPATH)/dc/dcn31/dcn31_resource.o := -mhard-float -maltivec
+endif
+
  AMD_DAL_DCN31 = $(addprefix $(AMDDALPATH)/dc/dcn31/,$(DCN31))
  AMD_DISPLAY_FILES += $(AMD_DAL_DCN31)
diff --git a/drivers/gpu/drm/amd/display/dc/dcn315/Makefile 
b/drivers/gpu/drm/amd/display/dc/dcn315/Makefile
index 59381d24800b..1395c1ced8c5 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn315/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dcn315/Makefile
@@ -25,6 +25,10 @@
  DCN315 = dcn315_resource.o
+ifdef CONFIG_PPC64
+CFLAGS_$(AMDDALPATH)/dc/dcn315/dcn315_resource.o := -mhard-float -maltivec
+endif
+
  AMD_DAL_DCN315 = $(addprefix $(AMDDALPATH)/dc/dcn315/,$(DCN315))
  AMD_DISPLAY_FILES += $(AMD_DAL_DCN315)
diff --git a/drivers/gpu/drm/amd/display/dc/dcn316/Makefile 
b/drivers/gpu/drm/amd/display/dc/dcn316/Makefile
index 819d44a9439b..c3d2dd78f1e2 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn316/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dcn316/Makefile
@@ -25,6 +25,10 @@
  DCN316 = dcn316_resource.o
+ifdef CONFIG_PPC64
+CFLAGS_$(AMDDALPATH)/dc/dcn316/dcn316_resource.o := -mhard-float -maltivec
+endif
+
  AMD_DAL_DCN316 = $(addprefix $(AMDDALPATH)/dc/dcn316/,$(DCN316))
  AMD_DISPLAY_FILES += $(AMD_DAL_DCN316)


Hi,

I don't want to re-introduce those FPU flags for DCN31/DCN314/DCN316 since we 
fully isolate FPU operations for those ASICs inside the DML folder. Notice that 
we have the PPC64 in the DML Makefile:

https://gitlab.freedesktop.org/agd5f/linux/-/blob/amd-staging-drm-next/drivers/gpu/drm/amd/display/dc/dml/Makefile



Yes, sure, ppc64 is in dc/dml/Makefile. The problem is that it selects 
hard-float

ifdef CONFIG_PPC64
dml_ccflags := -mhard-float -maltivec
endif

and dc/{dcn31,dcn315,dcn316} don't.



Could you share what you see without your patch in the amd-staging-drm-next?


You mean linux-next ? Same error.

Building powerpc:allmodconfig ... failed
--
Error log:
powerpc64-linux-ld: 
drivers/gpu/drm/amd/amdgpu/../display/dc/dml/display_mode_lib.o uses hard 
float, drivers/gpu/drm/amd/amdgpu/../display/dc/dcn31/dcn31_resource.o uses 
soft float
powerpc64-linux-ld: failed to merge target specific data of file 
drivers/gpu/drm/amd/amdgpu/../display/dc/dcn31/dcn31_resource.o
powerpc64-linux-ld: 
drivers/gpu/drm/amd/amdgpu/../display/dc/dml/display_mode_lib.o uses hard 
float, drivers/gpu/drm/amd/amdgpu/../display/dc/dcn315/dcn315_resource.o uses 
soft float
powerpc64-linux-ld: failed to merge target specific data of file 
drivers/gpu/drm/amd/amdgpu/../display/dc/dcn315/dcn315_resource.o
powerpc64-linux-ld: 
drivers/gpu/drm/amd/amdgpu/../display/dc/dml/display_mode_lib.o uses 

Re: [PATCH v3] drm/amdkfd: simplify vm_validate_pt_pd_bos

2022-06-30 Thread Felix Kuehling

On 2022-06-14 05:03, Lang Yu wrote:

We don't need to validate and map root PD specially here,
it would be validated and mapped by amdgpu_vm_validate_pt_bos
if it is evicted.

The special case is when turning a GFX VM to a compute VM,
if vm_update_mode changed, we should make sure root PD gets
mapped. So just map root PD after updating vm->update_funcs
in amdgpu_vm_make_compute whether the vm_update_mode changed
or not.

v3:
  - Add some comments suggested by Christian.

v2:
  - Don't rename vm_validate_pt_pd_bos and make it public.

Signed-off-by: Lang Yu 


Reviewed-by: Felix Kuehling 



---
  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 14 --
  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c   |  8 
  2 files changed, 8 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index 50bb590c3306..c9ef242177e2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -439,22 +439,8 @@ static int vm_validate_pt_pd_bos(struct amdgpu_vm *vm)
return ret;
}
  
-	ret = amdgpu_amdkfd_validate_vm_bo(NULL, pd);

-   if (ret) {
-   pr_err("failed to validate PD\n");
-   return ret;
-   }
-
vm->pd_phys_addr = amdgpu_gmc_pd_addr(vm->root.bo);
  
-	if (vm->use_cpu_for_update) {

-   ret = amdgpu_bo_kmap(pd, NULL);
-   if (ret) {
-   pr_err("failed to kmap PD, ret=%d\n", ret);
-   return ret;
-   }
-   }
-
return 0;
  }
  
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c

index 703552f9a6d7..3a6b827e540c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -2225,6 +2225,14 @@ int amdgpu_vm_make_compute(struct amdgpu_device *adev, 
struct amdgpu_vm *vm)
} else {
vm->update_funcs = _vm_sdma_funcs;
}
+   /*
+* Make sure root PD gets mapped. As vm_update_mode could be changed
+* when turning a GFX VM into a compute VM.
+*/
+   r = vm->update_funcs->map_table(to_amdgpu_bo_vm(vm->root.bo));
+   if (r)
+   goto unreserve_bo;
+
dma_fence_put(vm->last_update);
vm->last_update = NULL;
vm->is_compute_context = true;


Re: [PATCH] drm/amd/display: Add missing hard-float compile flags for PPC64 builds

2022-06-30 Thread Rodrigo Siqueira Jordao




On 2022-06-18 19:27, Guenter Roeck wrote:

ppc:allmodconfig builds fail with the following error.

powerpc64-linux-ld:
drivers/gpu/drm/amd/amdgpu/../display/dc/dml/display_mode_lib.o
uses hard float,
drivers/gpu/drm/amd/amdgpu/../display/dc/dcn31/dcn31_resource.o
uses soft float
powerpc64-linux-ld:
failed to merge target specific data of file
drivers/gpu/drm/amd/amdgpu/../display/dc/dcn31/dcn31_resource.o
powerpc64-linux-ld:
drivers/gpu/drm/amd/amdgpu/../display/dc/dml/display_mode_lib.o
uses hard float,
drivers/gpu/drm/amd/amdgpu/../display/dc/dcn315/dcn315_resource.o
uses soft float
powerpc64-linux-ld:
failed to merge target specific data of
file drivers/gpu/drm/amd/amdgpu/../display/dc/dcn315/dcn315_resource.o
powerpc64-linux-ld:
drivers/gpu/drm/amd/amdgpu/../display/dc/dml/display_mode_lib.o
uses hard float,
drivers/gpu/drm/amd/amdgpu/../display/dc/dcn316/dcn316_resource.o
uses soft float
powerpc64-linux-ld:
failed to merge target specific data of file
drivers/gpu/drm/amd/amdgpu/../display/dc/dcn316/dcn316_resource.o

The problem was introduced with commit 41b7a347bf14 ("powerpc: Book3S
64-bit outline-only KASAN support") which adds support for KASAN. This
commit in turn enables DRM_AMD_DC_DCN because KCOV_INSTRUMENT_ALL and
KCOV_ENABLE_COMPARISONS are no longer enabled. As result, new files are
compiled which lack the selection of hard-float.

Fixes: 41b7a347bf14 ("powerpc: Book3S 64-bit outline-only KASAN support")
Cc: Michael Ellerman 
Cc: Daniel Axtens 
Signed-off-by: Guenter Roeck 
---
  drivers/gpu/drm/amd/display/dc/dcn31/Makefile  | 4 
  drivers/gpu/drm/amd/display/dc/dcn315/Makefile | 4 
  drivers/gpu/drm/amd/display/dc/dcn316/Makefile | 4 
  3 files changed, 12 insertions(+)

diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/Makefile 
b/drivers/gpu/drm/amd/display/dc/dcn31/Makefile
index ec041e3cda30..74be02114ae4 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn31/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dcn31/Makefile
@@ -15,6 +15,10 @@ DCN31 = dcn31_resource.o dcn31_hubbub.o dcn31_hwseq.o 
dcn31_init.o dcn31_hubp.o
dcn31_apg.o dcn31_hpo_dp_stream_encoder.o dcn31_hpo_dp_link_encoder.o \
dcn31_afmt.o dcn31_vpg.o
  
+ifdef CONFIG_PPC64

+CFLAGS_$(AMDDALPATH)/dc/dcn31/dcn31_resource.o := -mhard-float -maltivec
+endif
+
  AMD_DAL_DCN31 = $(addprefix $(AMDDALPATH)/dc/dcn31/,$(DCN31))
  
  AMD_DISPLAY_FILES += $(AMD_DAL_DCN31)

diff --git a/drivers/gpu/drm/amd/display/dc/dcn315/Makefile 
b/drivers/gpu/drm/amd/display/dc/dcn315/Makefile
index 59381d24800b..1395c1ced8c5 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn315/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dcn315/Makefile
@@ -25,6 +25,10 @@
  
  DCN315 = dcn315_resource.o
  
+ifdef CONFIG_PPC64

+CFLAGS_$(AMDDALPATH)/dc/dcn315/dcn315_resource.o := -mhard-float -maltivec
+endif
+
  AMD_DAL_DCN315 = $(addprefix $(AMDDALPATH)/dc/dcn315/,$(DCN315))
  
  AMD_DISPLAY_FILES += $(AMD_DAL_DCN315)

diff --git a/drivers/gpu/drm/amd/display/dc/dcn316/Makefile 
b/drivers/gpu/drm/amd/display/dc/dcn316/Makefile
index 819d44a9439b..c3d2dd78f1e2 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn316/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dcn316/Makefile
@@ -25,6 +25,10 @@
  
  DCN316 = dcn316_resource.o
  
+ifdef CONFIG_PPC64

+CFLAGS_$(AMDDALPATH)/dc/dcn316/dcn316_resource.o := -mhard-float -maltivec
+endif
+
  AMD_DAL_DCN316 = $(addprefix $(AMDDALPATH)/dc/dcn316/,$(DCN316))
  
  AMD_DISPLAY_FILES += $(AMD_DAL_DCN316)


Hi,

I don't want to re-introduce those FPU flags for DCN31/DCN314/DCN316 
since we fully isolate FPU operations for those ASICs inside the DML 
folder. Notice that we have the PPC64 in the DML Makefile:


https://gitlab.freedesktop.org/agd5f/linux/-/blob/amd-staging-drm-next/drivers/gpu/drm/amd/display/dc/dml/Makefile

Could you share what you see without your patch in the 
amd-staging-drm-next? Also:

* Are you using cross-compilation? If so, could you share your setup?
* Which GCC/Clang version are you using?

Thanks
Siqueira



Re: [PATCH v7 04/14] mm: add device coherent vma selection for memory migration

2022-06-30 Thread David Hildenbrand
On 30.06.22 13:44, Alistair Popple wrote:
> 
> David Hildenbrand  writes:
> 
>> On 29.06.22 05:54, Alex Sierra wrote:
>>> This case is used to migrate pages from device memory, back to system
>>> memory. Device coherent type memory is cache coherent from device and CPU
>>> point of view.
>>>
>>> Signed-off-by: Alex Sierra 
>>> Acked-by: Felix Kuehling 
>>> Reviewed-by: Alistair Poppple 
>>> Signed-off-by: Christoph Hellwig 
>>
>>
>> I'm not too familiar with this code, please excuse my naive questions:
>>
>>> @@ -148,15 +148,21 @@ static int migrate_vma_collect_pmd(pmd_t *pmdp,
>>> if (is_writable_device_private_entry(entry))
>>> mpfn |= MIGRATE_PFN_WRITE;
>>> } else {
>>> -   if (!(migrate->flags & MIGRATE_VMA_SELECT_SYSTEM))
>>> -   goto next;
>>
>> Why not exclude MIGRATE_VMA_SELECT_DEVICE_PRIVATE here? IIRC that would
>> have happened before this change.
> 
> I might be missing something as I don't quite follow - this path is for
> normal system pages so we only want to skip selecting them if
> MIGRATE_VMA_SELECT_SYSTEM or MIGRATE_VMA_SELECT_DEVICE_COHERENT aren't
> set.
> 
> Note that MIGRATE_VMA_SELECT_DEVICE_PRIVATE doesn't apply here because
> we already know it's not a device private page by virtue of
> pte_present(pte) == True.

Ah, stupid me, pte_present(pte) is the key.

> 
>>> pfn = pte_pfn(pte);
>>> -   if (is_zero_pfn(pfn)) {
>>> +   if (is_zero_pfn(pfn) &&
>>> +   (migrate->flags & MIGRATE_VMA_SELECT_SYSTEM)) {
>>> mpfn = MIGRATE_PFN_MIGRATE;
>>> migrate->cpages++;
>>> goto next;
>>> }
>>> page = vm_normal_page(migrate->vma, addr, pte);
>>> +   if (page && !is_zone_device_page(page) &&
>>
>> I'm wondering if that check logically belongs into patch #2.
> 
> I don't think so as it would break functionality until the below
> conditionals are added - we explicitly don't want to skip
> is_zone_device_page(page) == False here because that is the pages we are
> trying to select.
> 
> You could add in this:
> 
>>> +   !(migrate->flags & MIGRATE_VMA_SELECT_SYSTEM))
> 
> But then in patch 2 we know this can never be true because we've already
> checked for !MIGRATE_VMA_SELECT_SYSTEM there.


Ah, okay, thanks

Reviewed-by: David Hildenbrand 

-- 
Thanks,

David / dhildenb



Re: [PATCH] drm/amdgpu/display: add missing FP_START/END checks dcn32_clk_mgr.c

2022-06-30 Thread Rodrigo Siqueira Jordao




On 2022-06-27 17:04, Alex Deucher wrote:

Properly handle FP code in dcn32_clk_mgr.c.

Fixes: 265280b99822 ("drm/amd/display: add CLKMGR changes for DCN32/321")
Signed-off-by: Alex Deucher 
---
  drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c | 4 
  1 file changed, 4 insertions(+)

diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c 
b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c
index 4e8059f20007..72bbe7f18f5d 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c
@@ -288,8 +288,10 @@ void dcn32_init_clocks(struct clk_mgr *clk_mgr_base)
/* Get UCLK, update bounding box */
clk_mgr_base->funcs->get_memclk_states_from_smu(clk_mgr_base);
  
+	DC_FP_START();

/* WM range table */
dcn32_build_wm_range_table(clk_mgr);
+   DC_FP_END();
  }
  
  static void dcn32_update_clocks_update_dtb_dto(struct clk_mgr_internal *clk_mgr,

@@ -724,9 +726,11 @@ static void dcn32_get_memclk_states_from_smu(struct 
clk_mgr *clk_mgr_base)
_levels);
clk_mgr_base->bw_params->clk_table.num_entries = num_levels ? 
num_levels : 1;
  
+	DC_FP_START();

/* Refresh bounding box */
clk_mgr_base->ctx->dc->res_pool->funcs->update_bw_bounding_box(
clk_mgr->base.ctx->dc, clk_mgr_base->bw_params);
+   DC_FP_END();
  }
  
  static bool dcn32_are_clock_states_equal(struct dc_clocks *a,


Hi Alex,

Reviewed-by: Rodrigo Siqueira 

Btw, I already start to work on the FPU isolation for DCN32/321.

Thanks
Siqueira





[PATCH] drm/amd/display: Remove duplicate code across dcn30 and dcn31

2022-06-30 Thread Maíra Canal
The function CalculateBytePerPixelAnd256BBlockSizes was defined four
times: on display_mode_vba_30.c, display_rq_dlg_calc_30.c,
display_mode_vba_31.c and display_rq_dlg_calc_31.c. In order to avoid
code duplication, the CalculateBytePerPixelAnd256BBlockSizes is defined
on display_mode_vba_30.h and used across dcn30 and dcn31.

Signed-off-by: Maíra Canal 
---
 .../dc/dml/dcn30/display_mode_vba_30.c|  21 +---
 .../dc/dml/dcn30/display_mode_vba_30.h|  11 ++
 .../dc/dml/dcn30/display_rq_dlg_calc_30.c |  93 +--
 .../dc/dml/dcn31/display_mode_vba_31.c| 106 +-
 .../dc/dml/dcn31/display_rq_dlg_calc_31.c |  91 +--
 5 files changed, 23 insertions(+), 299 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c 
b/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c
index fb4aa4c800bf..842eb94ebe04 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c
@@ -712,18 +712,6 @@ static double CalculateUrgentLatency(
double UrgentLatencyAdjustmentFabricClockReference,
double FabricClockSingle);
 
-static bool CalculateBytePerPixelAnd256BBlockSizes(
-   enum source_format_class SourcePixelFormat,
-   enum dm_swizzle_mode SurfaceTiling,
-   unsigned int *BytePerPixelY,
-   unsigned int *BytePerPixelC,
-   double   *BytePerPixelDETY,
-   double   *BytePerPixelDETC,
-   unsigned int *BlockHeight256BytesY,
-   unsigned int *BlockHeight256BytesC,
-   unsigned int *BlockWidth256BytesY,
-   unsigned int *BlockWidth256BytesC);
-
 void dml30_recalculate(struct display_mode_lib *mode_lib)
 {
ModeSupportAndSystemConfiguration(mode_lib);
@@ -2095,7 +2083,7 @@ static void 
DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman
DTRACE("   return_bus_bw  = %f", v->ReturnBW);
 
for (k = 0; k < v->NumberOfActivePlanes; ++k) {
-   CalculateBytePerPixelAnd256BBlockSizes(
+   dml30_CalculateBytePerPixelAnd256BBlockSizes(
v->SourcePixelFormat[k],
v->SurfaceTiling[k],
>BytePerPixelY[k],
@@ -3165,7 +3153,7 @@ static void DisplayPipeConfiguration(struct 
display_mode_lib *mode_lib)
 
for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
 
-   CalculateBytePerPixelAnd256BBlockSizes(
+   dml30_CalculateBytePerPixelAnd256BBlockSizes(
mode_lib->vba.SourcePixelFormat[k],
mode_lib->vba.SurfaceTiling[k],
[k],
@@ -3218,7 +3206,7 @@ static void DisplayPipeConfiguration(struct 
display_mode_lib *mode_lib)
);
 }
 
-static bool CalculateBytePerPixelAnd256BBlockSizes(
+void dml30_CalculateBytePerPixelAnd256BBlockSizes(
enum source_format_class SourcePixelFormat,
enum dm_swizzle_mode SurfaceTiling,
unsigned int *BytePerPixelY,
@@ -3305,7 +3293,6 @@ static bool CalculateBytePerPixelAnd256BBlockSizes(
*BlockWidth256BytesY = 256U / *BytePerPixelY / 
*BlockHeight256BytesY;
*BlockWidth256BytesC = 256U / *BytePerPixelC / 
*BlockHeight256BytesC;
}
-   return true;
 }
 
 static double CalculateTWait(
@@ -3709,7 +3696,7 @@ void dml30_ModeSupportAndSystemConfigurationFull(struct 
display_mode_lib *mode_l
/*Bandwidth Support Check*/
 
for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
-   CalculateBytePerPixelAnd256BBlockSizes(
+   dml30_CalculateBytePerPixelAnd256BBlockSizes(
v->SourcePixelFormat[k],
v->SurfaceTiling[k],
>BytePerPixelY[k],
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.h 
b/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.h
index 4e249eaabfdb..daaf0883b84d 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.h
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.h
@@ -39,5 +39,16 @@ double dml30_CalculateWriteBackDISPCLK(
long   WritebackDestinationWidth,
unsigned int HTotal,
unsigned int WritebackLineBufferSize);
+void dml30_CalculateBytePerPixelAnd256BBlockSizes(
+   enum source_format_class SourcePixelFormat,
+   enum dm_swizzle_mode SurfaceTiling,
+   unsigned int *BytePerPixelY,
+   unsigned int *BytePerPixelC,
+   double   *BytePerPixelDETY,
+   double   *BytePerPixelDETC,
+   unsigned int *BlockHeight256BytesY,
+   unsigned int 

Re: [PATCH] drm/amd: Add debug mask for subviewport mclk switch

2022-06-30 Thread Alex Deucher
On Thu, Jun 30, 2022 at 3:28 PM Aurabindo Pillai
 wrote:
>
>
>
> On 2022-06-30 14:28, Alex Deucher wrote:
> > On Tue, Jun 28, 2022 at 5:26 PM Aurabindo Pillai
> >  wrote:
> >>
> >> [Why]
> >> Expose a new debugfs enum to force a subviewport memory clock switch
> >> to facilitate easy testing.
> >>
> >
> > Is the debugfs support already plumbed in and this just enables you to
> > use it?  If it's in debugfs, do we really need a module parameter to
> > enable it or can it just be enabled by default and used via debugfs
> > when needed?
>
> Sorry, I meant to put in debug mask, like in the title. Will fix that.
> There is no debugfs associated with this change. The change in DM is
> enough to enable this.
> >

Thanks.  With that fixed,
Acked-by: Alex Deucher 

> > Alex
> >
> >> Signed-off-by: Aurabindo Pillai 
> >> ---
> >>   drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 3 +++
> >>   drivers/gpu/drm/amd/include/amd_shared.h  | 1 +
> >>   2 files changed, 4 insertions(+)
> >>
> >> diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c 
> >> b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> >> index c9145864ed2b..7a034ca95be2 100644
> >> --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> >> +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> >> @@ -1559,6 +1559,9 @@ static int amdgpu_dm_init(struct amdgpu_device *adev)
> >>  if (amdgpu_dc_debug_mask & DC_DISABLE_CLOCK_GATING)
> >>  adev->dm.dc->debug.disable_clock_gate = true;
> >>
> >> +   if (amdgpu_dc_debug_mask & DC_FORCE_SUBVP_MCLK_SWITCH)
> >> +   adev->dm.dc->debug.force_subvp_mclk_switch = true;
> >> +
> >>  r = dm_dmub_hw_init(adev);
> >>  if (r) {
> >>  DRM_ERROR("DMUB interface failed to initialize: 
> >> status=%d\n", r);
> >> diff --git a/drivers/gpu/drm/amd/include/amd_shared.h 
> >> b/drivers/gpu/drm/amd/include/amd_shared.h
> >> index bcdf7453a403..b1c55dd7b498 100644
> >> --- a/drivers/gpu/drm/amd/include/amd_shared.h
> >> +++ b/drivers/gpu/drm/amd/include/amd_shared.h
> >> @@ -247,6 +247,7 @@ enum DC_DEBUG_MASK {
> >>  DC_DISABLE_DSC = 0x4,
> >>  DC_DISABLE_CLOCK_GATING = 0x8,
> >>  DC_DISABLE_PSR = 0x10,
> >> +   DC_FORCE_SUBVP_MCLK_SWITCH = 0x20,
> >>   };
> >>
> >>   enum amd_dpm_forced_level;
> >> --
> >> 2.36.1
> >>


Re: [PATCH] drm/amd: Add debug mask for subviewport mclk switch

2022-06-30 Thread Aurabindo Pillai




On 2022-06-30 14:28, Alex Deucher wrote:

On Tue, Jun 28, 2022 at 5:26 PM Aurabindo Pillai
 wrote:


[Why]
Expose a new debugfs enum to force a subviewport memory clock switch
to facilitate easy testing.



Is the debugfs support already plumbed in and this just enables you to
use it?  If it's in debugfs, do we really need a module parameter to
enable it or can it just be enabled by default and used via debugfs
when needed?


Sorry, I meant to put in debug mask, like in the title. Will fix that. 
There is no debugfs associated with this change. The change in DM is 
enough to enable this.


Alex


Signed-off-by: Aurabindo Pillai 
---
  drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 3 +++
  drivers/gpu/drm/amd/include/amd_shared.h  | 1 +
  2 files changed, 4 insertions(+)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index c9145864ed2b..7a034ca95be2 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -1559,6 +1559,9 @@ static int amdgpu_dm_init(struct amdgpu_device *adev)
 if (amdgpu_dc_debug_mask & DC_DISABLE_CLOCK_GATING)
 adev->dm.dc->debug.disable_clock_gate = true;

+   if (amdgpu_dc_debug_mask & DC_FORCE_SUBVP_MCLK_SWITCH)
+   adev->dm.dc->debug.force_subvp_mclk_switch = true;
+
 r = dm_dmub_hw_init(adev);
 if (r) {
 DRM_ERROR("DMUB interface failed to initialize: status=%d\n", 
r);
diff --git a/drivers/gpu/drm/amd/include/amd_shared.h 
b/drivers/gpu/drm/amd/include/amd_shared.h
index bcdf7453a403..b1c55dd7b498 100644
--- a/drivers/gpu/drm/amd/include/amd_shared.h
+++ b/drivers/gpu/drm/amd/include/amd_shared.h
@@ -247,6 +247,7 @@ enum DC_DEBUG_MASK {
 DC_DISABLE_DSC = 0x4,
 DC_DISABLE_CLOCK_GATING = 0x8,
 DC_DISABLE_PSR = 0x10,
+   DC_FORCE_SUBVP_MCLK_SWITCH = 0x20,
  };

  enum amd_dpm_forced_level;
--
2.36.1



[PATCH 40/40] drm/amd/display: 3.2.192

2022-06-30 Thread Rodrigo Siqueira
From: Aric Cyr 

This DC patchset brings improvements in multiple areas. In summary, we
highlight:

- Program ACP-related registers
- Fixes for DMUB, DPIA, PSR, and others
- Improvements in the pipe split
- Add SubVP code
- Add basic setup for FAMS support
- Improve BB capabilities

Acked-by: Rodrigo Siqueira 
Signed-off-by: Aric Cyr 
---
 drivers/gpu/drm/amd/display/dc/dc.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dc.h 
b/drivers/gpu/drm/amd/display/dc/dc.h
index a9c59669dd1d..02bbc90a2c80 100644
--- a/drivers/gpu/drm/amd/display/dc/dc.h
+++ b/drivers/gpu/drm/amd/display/dc/dc.h
@@ -47,7 +47,7 @@ struct aux_payload;
 struct set_config_cmd_payload;
 struct dmub_notification;
 
-#define DC_VER "3.2.191"
+#define DC_VER "3.2.192"
 
 #define MAX_SURFACES 3
 #define MAX_PLANES 6
-- 
2.25.1



[PATCH 36/40] drm/amd/display: OVT Update on InfoFrame and Mode Management

2022-06-30 Thread Rodrigo Siqueira
From: Chris Park 

[Why]
Integrate OVT timing from DM to DC logic to update info frame
and mode management to report the resolution to the OS.

[How]
Reflect RID and Frame Rate to AVI InfoFrame Version 5.
Define new Timing Standard for OVT timing.

Reviewed-by: Charlene Liu 
Acked-by: Alan Liu 
Signed-off-by: Chris Park 
---
 drivers/gpu/drm/amd/display/dc/core/dc_resource.c| 11 +++
 drivers/gpu/drm/amd/display/dc/dc_hw_types.h |  2 ++
 drivers/gpu/drm/amd/display/dc/dc_types.h|  2 ++
 drivers/gpu/drm/amd/display/include/set_mode_types.h |  8 ++--
 4 files changed, 21 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c 
b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
index 85cbea4dbf72..2aa42c710488 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
@@ -2514,6 +2514,8 @@ static void set_avi_info_frame(
union hdmi_info_packet hdmi_info;
union display_content_support support = {0};
unsigned int vic = pipe_ctx->stream->timing.vic;
+   unsigned int rid = pipe_ctx->stream->timing.rid;
+   unsigned int fr_ind = pipe_ctx->stream->timing.fr_index;
enum dc_timing_3d_format format;
 
memset(_info, 0, sizeof(union hdmi_info_packet));
@@ -2706,6 +2708,15 @@ static void set_avi_info_frame(
hdmi_info.bits.header.length = 14;
}
 
+   if (rid != 0 && fr_ind != 0) {
+   hdmi_info.bits.header.version = 5;
+   hdmi_info.bits.header.length = 15;
+
+   hdmi_info.bits.FR0_FR3 = fr_ind & 0xF;
+   hdmi_info.bits.FR4 = (fr_ind >> 4) & 0x1;
+   hdmi_info.bits.RID0_RID5 = rid;
+   }
+
/* pixel repetition
 * PR0 - PR3 start from 0 whereas pHwPathMode->mode.timing.flags.pixel
 * repetition start from 1 */
diff --git a/drivers/gpu/drm/amd/display/dc/dc_hw_types.h 
b/drivers/gpu/drm/amd/display/dc/dc_hw_types.h
index d75416dc9fae..584aaf6967fd 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_hw_types.h
+++ b/drivers/gpu/drm/amd/display/dc/dc_hw_types.h
@@ -784,6 +784,8 @@ struct dc_crtc_timing {
 
uint32_t vic;
uint32_t hdmi_vic;
+   uint32_t rid;
+   uint32_t fr_index;
enum dc_timing_3d_format timing_3d_format;
enum dc_color_depth display_color_depth;
enum dc_pixel_encoding pixel_encoding;
diff --git a/drivers/gpu/drm/amd/display/dc/dc_types.h 
b/drivers/gpu/drm/amd/display/dc/dc_types.h
index 084074a23031..7e595310a4b8 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_types.h
+++ b/drivers/gpu/drm/amd/display/dc/dc_types.h
@@ -280,6 +280,8 @@ enum dc_timing_source {
TIMING_SOURCE_EDID_CEA_SVD,
TIMING_SOURCE_EDID_CVT_3BYTE,
TIMING_SOURCE_EDID_4BYTE,
+   TIMING_SOURCE_EDID_CEA_DISPLAYID_VTDB,
+   TIMING_SOURCE_EDID_CEA_RID,
TIMING_SOURCE_VBIOS,
TIMING_SOURCE_CV,
TIMING_SOURCE_TV,
diff --git a/drivers/gpu/drm/amd/display/include/set_mode_types.h 
b/drivers/gpu/drm/amd/display/include/set_mode_types.h
index 845fea8a387f..75f2c79492c0 100644
--- a/drivers/gpu/drm/amd/display/include/set_mode_types.h
+++ b/drivers/gpu/drm/amd/display/include/set_mode_types.h
@@ -84,10 +84,14 @@ union hdmi_info_packet {
uint16_t bar_left;
uint16_t bar_right;
 
-   uint8_t F140_F143:4;
+   uint8_t FR0_FR3:4;
uint8_t ACE0_ACE3:4;
 
-   uint8_t reserved[13];
+   uint8_t RID0_RID5:6;
+   uint8_t FR4:1;
+   uint8_t F157:1;
+
+   uint8_t reserved[12];
} bits;
 
struct info_packet_raw_data packet_raw_data;
-- 
2.25.1



[PATCH 39/40] drm/amd/display: Maintain old audio programming sequence

2022-06-30 Thread Rodrigo Siqueira
From: Alvin Lee 

[Description]
Program audio DTO before wall dto for audio

Reviewed-by: Martin Leung 
Acked-by: Qingqing Zhuo 
Signed-off-by: Alvin Lee 
---
 .../gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c   | 8 +++-
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c 
b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c
index 76e1e0966064..e69c942c8345 100644
--- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c
+++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c
@@ -2171,16 +2171,14 @@ static void dce110_setup_audio_dto(
if (dc->res_pool->dccg && 
dc->res_pool->dccg->funcs->set_audio_dtbclk_dto) {
struct dtbclk_dto_params dto_params = {0};
 
+   dc->res_pool->dccg->funcs->set_audio_dtbclk_dto(
+   dc->res_pool->dccg, _params);
+

pipe_ctx->stream_res.audio->funcs->wall_dto_setup(
pipe_ctx->stream_res.audio,
pipe_ctx->stream->signal,
_output.crtc_info,
_output.pll_info);
-
-   /* disable audio DTBCLK DTO */
-   dc->res_pool->dccg->funcs->set_audio_dtbclk_dto(
-   dc->res_pool->dccg, _params);
-
} else

pipe_ctx->stream_res.audio->funcs->wall_dto_setup(
pipe_ctx->stream_res.audio,
-- 
2.25.1



[PATCH 35/40] drm/amd/display: Initialize lt_settings on instantiation

2022-06-30 Thread Rodrigo Siqueira
From: Michael Strauss 

[WHY]
lt_settings' pointers remain uninitialized but nonzero if display fails
to light up with no DPCD/EDID info populated, leading to a hang on access

Reviewed-by: Nicholas Kazlauskas 
Acked-by: Alan Liu 
Signed-off-by: Michael Strauss 
---
 drivers/gpu/drm/amd/display/dc/core/dc_link_dpia.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_dpia.c 
b/drivers/gpu/drm/amd/display/dc/core/dc_link_dpia.c
index 3b8acda50701..468e39589ed8 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_link_dpia.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_dpia.c
@@ -960,7 +960,7 @@ enum link_training_result 
dc_link_dpia_perform_link_training(
bool skip_video_pattern)
 {
enum link_training_result result;
-   struct link_training_settings lt_settings;
+   struct link_training_settings lt_settings = {0};
uint8_t repeater_cnt = 0; /* Number of hops/repeaters in display path. 
*/
int8_t repeater_id; /* Current hop. */
 
-- 
2.25.1



[PATCH 37/40] drm/amd/display: enable PCON SST support for newer ASICs

2022-06-30 Thread Rodrigo Siqueira
From: Hamza Mahfooz 

Generic PCON SST support already exists and works for newer ASICs. So,
enable it by default.

Acked-by: Rodrigo Siqueira 
Signed-off-by: Hamza Mahfooz 
---
 drivers/gpu/drm/amd/display/dc/dcn315/dcn315_resource.c | 1 +
 drivers/gpu/drm/amd/display/dc/dcn316/dcn316_resource.c | 1 +
 drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c   | 1 +
 drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c | 1 +
 4 files changed, 4 insertions(+)

diff --git a/drivers/gpu/drm/amd/display/dc/dcn315/dcn315_resource.c 
b/drivers/gpu/drm/amd/display/dc/dcn315/dcn315_resource.c
index 4f45753484fe..e346a00f395f 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn315/dcn315_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn315/dcn315_resource.c
@@ -1768,6 +1768,7 @@ static bool dcn315_resource_construct(
dc->caps.post_blend_color_processing = true;
dc->caps.force_dp_tps4_for_cp2520 = true;
dc->caps.dp_hpo = true;
+   dc->caps.hdmi_frl_pcon_support = true;
dc->caps.edp_dsc_support = true;
dc->caps.extended_aux_timeout_support = true;
dc->caps.dmcub_support = true;
diff --git a/drivers/gpu/drm/amd/display/dc/dcn316/dcn316_resource.c 
b/drivers/gpu/drm/amd/display/dc/dcn316/dcn316_resource.c
index f9cee05aeccc..1b170e2d546b 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn316/dcn316_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn316/dcn316_resource.c
@@ -1770,6 +1770,7 @@ static bool dcn316_resource_construct(
dc->caps.post_blend_color_processing = true;
dc->caps.force_dp_tps4_for_cp2520 = true;
dc->caps.dp_hpo = true;
+   dc->caps.hdmi_frl_pcon_support = true;
dc->caps.edp_dsc_support = true;
dc->caps.extended_aux_timeout_support = true;
dc->caps.dmcub_support = true;
diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c 
b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c
index e9ecc27a51de..65de38b96312 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c
@@ -4010,6 +4010,7 @@ static bool dcn32_resource_construct(
dc->caps.post_blend_color_processing = true;
dc->caps.force_dp_tps4_for_cp2520 = true;
dc->caps.dp_hpo = true;
+   dc->caps.hdmi_frl_pcon_support = true;
dc->caps.edp_dsc_support = true;
dc->caps.extended_aux_timeout_support = true;
dc->caps.dmcub_support = true;
diff --git a/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c 
b/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c
index 81027b780d15..f38b16335086 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c
@@ -2305,6 +2305,7 @@ static bool dcn321_resource_construct(
dc->caps.post_blend_color_processing = true;
dc->caps.force_dp_tps4_for_cp2520 = true;
dc->caps.dp_hpo = true;
+   dc->caps.hdmi_frl_pcon_support = true;
dc->caps.edp_dsc_support = true;
dc->caps.extended_aux_timeout_support = true;
dc->caps.dmcub_support = true;
-- 
2.25.1



[PATCH 38/40] drm/amd/display: rename hdmi_frl_pcon_support

2022-06-30 Thread Rodrigo Siqueira
From: Hamza Mahfooz 

hdmi_frl_pcon_support has been the source of confusion. So, rename it to
dp_hdmi21_pcon_support.

Signed-off-by: Hamza Mahfooz 
---
 drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c| 2 +-
 drivers/gpu/drm/amd/display/dc/dc.h | 2 +-
 drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c   | 2 +-
 drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c   | 2 +-
 drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c   | 2 +-
 drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c   | 2 +-
 drivers/gpu/drm/amd/display/dc/dcn315/dcn315_resource.c | 2 +-
 drivers/gpu/drm/amd/display/dc/dcn316/dcn316_resource.c | 2 +-
 drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c   | 2 +-
 drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c | 2 +-
 10 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c 
b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c
index 07e5b316fbde..a126921c664a 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c
@@ -4890,7 +4890,7 @@ static void get_active_converter_info(

hdmi_color_caps.bits.MAX_BITS_PER_COLOR_COMPONENT);
 
 #if defined(CONFIG_DRM_AMD_DC_DCN)
-   if 
(link->dc->caps.hdmi_frl_pcon_support) {
+   if 
(link->dc->caps.dp_hdmi21_pcon_support) {
union hdmi_encoded_link_bw 
hdmi_encoded_link_bw;
 

link->dpcd_caps.dongle_caps.dp_hdmi_frl_max_link_bw_in_kbps =
diff --git a/drivers/gpu/drm/amd/display/dc/dc.h 
b/drivers/gpu/drm/amd/display/dc/dc.h
index b9a23e6b3696..a9c59669dd1d 100644
--- a/drivers/gpu/drm/amd/display/dc/dc.h
+++ b/drivers/gpu/drm/amd/display/dc/dc.h
@@ -203,7 +203,7 @@ struct dc_caps {
struct dc_color_caps color;
struct dc_dmub_caps dmub_caps;
bool dp_hpo;
-   bool hdmi_frl_pcon_support;
+   bool dp_hdmi21_pcon_support;
bool edp_dsc_support;
bool vbios_lttpr_aware;
bool vbios_lttpr_enable;
diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c 
b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c
index a0a94ea58a57..1483de85a524 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c
@@ -2463,7 +2463,7 @@ static bool dcn20_resource_construct(
dc->caps.color.mpc.ogam_rom_caps.hlg = 0;
dc->caps.color.mpc.ocsc = 1;
 
-   dc->caps.hdmi_frl_pcon_support = true;
+   dc->caps.dp_hdmi21_pcon_support = true;
 
if (dc->ctx->dce_environment == DCE_ENV_PRODUCTION_DRV) {
dc->debug = debug_defaults_drv;
diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c 
b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c
index faab59508d82..d95875952fba 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c
@@ -1490,7 +1490,7 @@ static bool dcn21_resource_construct(
dc->caps.color.mpc.ogam_rom_caps.hlg = 0;
dc->caps.color.mpc.ocsc = 1;
 
-   dc->caps.hdmi_frl_pcon_support = true;
+   dc->caps.dp_hdmi21_pcon_support = true;
 
if (dc->ctx->dce_environment == DCE_ENV_PRODUCTION_DRV)
dc->debug = debug_defaults_drv;
diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c 
b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c
index b74d5f3f0472..4f684235a413 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c
@@ -2379,7 +2379,7 @@ static bool dcn30_resource_construct(
dc->caps.color.mpc.ogam_rom_caps.hlg = 0;
dc->caps.color.mpc.ocsc = 1;
 
-   dc->caps.hdmi_frl_pcon_support = true;
+   dc->caps.dp_hdmi21_pcon_support = true;
 
/* read VBIOS LTTPR caps */
{
diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c 
b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c
index 1a67d04cc017..16bbccc69fdc 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c
@@ -1898,7 +1898,7 @@ static bool dcn31_resource_construct(
dc->caps.post_blend_color_processing = true;
dc->caps.force_dp_tps4_for_cp2520 = true;
dc->caps.dp_hpo = true;
-   dc->caps.hdmi_frl_pcon_support = true;
+   dc->caps.dp_hdmi21_pcon_support = true;
dc->caps.edp_dsc_support = true;
dc->caps.extended_aux_timeout_support = true;
dc->caps.dmcub_support = true;
diff --git a/drivers/gpu/drm/amd/display/dc/dcn315/dcn315_resource.c 
b/drivers/gpu/drm/amd/display/dc/dcn315/dcn315_resource.c
index e346a00f395f..df2abd8fe2eb 100644
--- 

[PATCH 30/40] drm/amd/display: Disable TBT3 DSC work around by default.

2022-06-30 Thread Rodrigo Siqueira
From: Jimmy Kizito 

[Why]
Some TBT3 docks have DPOAs which report USB4 capability and are expected
to support USB4 DPOA features such as FEC/DSC.

[How]
By default, do not override FEC/DSC capabilities reported by TBT3 docks.

Reviewed-by: Meenakshikumar Somasundaram 
Acked-by: Rodrigo Siqueira 
Signed-off-by: Jimmy Kizito 
---
 drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c | 2 +-
 drivers/gpu/drm/amd/display/dc/dc.h  | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c 
b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c
index e0660e7356c9..07e5b316fbde 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c
@@ -5553,7 +5553,7 @@ static bool retrieve_link_cap(struct dc_link *link)
 * only if required.
 */
if (link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA &&
-   
!link->dc->debug.dpia_debug.bits.disable_force_tbt3_work_around &&
+   
link->dc->debug.dpia_debug.bits.enable_force_tbt3_work_around &&
link->dpcd_caps.is_branch_dev &&
link->dpcd_caps.branch_dev_id == 
DP_BRANCH_DEVICE_ID_90CC24 &&
link->dpcd_caps.branch_hw_revision == 
DP_BRANCH_HW_REV_10 &&
diff --git a/drivers/gpu/drm/amd/display/dc/dc.h 
b/drivers/gpu/drm/amd/display/dc/dc.h
index 30379e5ff898..337bbd4c6642 100644
--- a/drivers/gpu/drm/amd/display/dc/dc.h
+++ b/drivers/gpu/drm/amd/display/dc/dc.h
@@ -542,7 +542,7 @@ union dpia_debug_options {
uint32_t force_non_lttpr:1; /* bit 1 */
uint32_t extend_aux_rd_interval:1; /* bit 2 */
uint32_t disable_mst_dsc_work_around:1; /* bit 3 */
-   uint32_t disable_force_tbt3_work_around:1; /* bit 4 */
+   uint32_t enable_force_tbt3_work_around:1; /* bit 4 */
uint32_t reserved:27;
} bits;
uint32_t raw;
-- 
2.25.1



[PATCH 19/40] drm/amd/display: Don't set dram clock change requirement for SubVP

2022-06-30 Thread Rodrigo Siqueira
From: Alvin Lee 

[Description]
In general cases we want to keep the dram clock change requirement (we
prefer configs that support MCLK switch). Only override to false for
SubVP.

Reviewed-by: Jun Lei 
Acked-by: Rodrigo Siqueira 
Signed-off-by: Alvin Lee 
---
 drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h | 1 +
 drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h 
b/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h
index b5d7e251ed81..87c9b9f9976e 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h
+++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h
@@ -184,6 +184,7 @@ struct _vcs_dpi_soc_bounding_box_st {
double max_avg_fabric_bw_use_normal_percent;
double max_avg_dram_bw_use_normal_strobe_percent;
enum dm_prefetch_modes allow_for_pstate_or_stutter_in_vblank_final;
+   bool dram_clock_change_requirement_final;
double writeback_latency_us;
double ideal_dram_bw_after_urgent_percent;
double pct_ideal_dram_sdp_bw_after_urgent_pixel_only; // 
PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelDataOnly
diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c 
b/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c
index 5185c2ccdfd5..95edca4c085b 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c
@@ -343,7 +343,7 @@ static void fetch_socbb_params(struct display_mode_lib 
*mode_lib)

mode_lib->vba.MaxAveragePercentOfIdealDRAMBWDisplayCanUseInNormalSystemOperationSTROBE
 =
soc->max_avg_dram_bw_use_normal_strobe_percent;
 
-   mode_lib->vba.DRAMClockChangeRequirementFinal = 1;
+   mode_lib->vba.DRAMClockChangeRequirementFinal = 
soc->dram_clock_change_requirement_final;
mode_lib->vba.FCLKChangeRequirementFinal = 1;
mode_lib->vba.USRRetrainingRequiredFinal = 1;
mode_lib->vba.ConfigurableDETSizeEnFinal = 0;
-- 
2.25.1



[PATCH 24/40] drm/amd/display: Guard against NULL link encoder in log hw state

2022-06-30 Thread Rodrigo Siqueira
From: Nicholas Kazlauskas 

[Why & How]
Check lenc is not NULL since dynamic link encoder assignment could
end up assigning a NULL link encoder.

Reviewed-by: Michael Strauss 
Acked-by: Rodrigo Siqueira 
Signed-off-by: Nicholas Kazlauskas 
---
 drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c 
b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c
index aaa0bf321bce..029deb81fcfa 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c
@@ -444,7 +444,7 @@ void dcn10_log_hw_state(struct dc *dc,
 
struct link_enc_state s = {0};
 
-   if (lenc->funcs->read_state) {
+   if (lenc && lenc->funcs->read_state) {
lenc->funcs->read_state(lenc, );
DTN_INFO("[%-3d]: %-12d %-22d %-22d %-25d\n",
i,
-- 
2.25.1



[PATCH 28/40] drm/amd/display: Remove configuration option for dpia hpd delay

2022-06-30 Thread Rodrigo Siqueira
From: Meenakshikumar Somasundaram 

[Why]
DC debug option to configure dpia hpd processing delay is not required.

[How]
Remove dc debug option for dpia hpd delay and also added log for
querying dpia hpd state.

Reviewed-by: Mustapha Ghaddar 
Reviewed-by: Jun Lei 
Acked-by: Rodrigo Siqueira 
Signed-off-by: Meenakshikumar Somasundaram 
---
 drivers/gpu/drm/amd/display/dc/core/dc_link_dpia.c | 7 +++
 drivers/gpu/drm/amd/display/dc/dc.h| 5 ++---
 2 files changed, 9 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_dpia.c 
b/drivers/gpu/drm/amd/display/dc/core/dc_link_dpia.c
index 03f7249df1ef..c8610a5dedfa 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_link_dpia.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_dpia.c
@@ -85,6 +85,13 @@ bool dc_link_dpia_query_hpd_status(struct dc_link *link)
if (dc_dmub_srv_cmd_with_reply_data(dmub_srv, ) && 
cmd.query_hpd.data.status == AUX_RET_SUCCESS)
is_hpd_high = cmd.query_hpd.data.result;
 
+   DC_LOG_DEBUG("%s: link(%d) dpia(%d) cmd_status(%d) result(%d)\n",
+   __func__,
+   link->link_index,
+   link->link_id.enum_id - ENUM_ID_1,
+   cmd.query_hpd.data.status,
+   cmd.query_hpd.data.result);
+
return is_hpd_high;
 }
 
diff --git a/drivers/gpu/drm/amd/display/dc/dc.h 
b/drivers/gpu/drm/amd/display/dc/dc.h
index 6674edf69b87..30379e5ff898 100644
--- a/drivers/gpu/drm/amd/display/dc/dc.h
+++ b/drivers/gpu/drm/amd/display/dc/dc.h
@@ -542,9 +542,8 @@ union dpia_debug_options {
uint32_t force_non_lttpr:1; /* bit 1 */
uint32_t extend_aux_rd_interval:1; /* bit 2 */
uint32_t disable_mst_dsc_work_around:1; /* bit 3 */
-   uint32_t hpd_delay_in_ms:12; /* bits 4-15 */
-   uint32_t disable_force_tbt3_work_around:1; /* bit 16 */
-   uint32_t reserved:15;
+   uint32_t disable_force_tbt3_work_around:1; /* bit 4 */
+   uint32_t reserved:27;
} bits;
uint32_t raw;
 };
-- 
2.25.1



[PATCH 21/40] drm/amd/display: Program ACP related register

2022-06-30 Thread Rodrigo Siqueira
From: Alan Liu 

- Setup the shift and mask of HDMI_ACP_SEND register
- Program the register in hdmi stream encoder
- Also update ACP register in azalia configuration

Reviewed-by: Harry Wentland 
Acked-by: Rodrigo Siqueira 
Signed-off-by: Alan Liu 
---
 drivers/gpu/drm/amd/display/dc/dce/dce_audio.c   | 11 +++
 .../drm/amd/display/dc/dce/dce_stream_encoder.c  |  3 ++-
 .../drm/amd/display/dc/dce/dce_stream_encoder.h  | 14 ++
 .../amd/display/dc/dcn10/dcn10_stream_encoder.c  |  3 ++-
 .../amd/display/dc/dcn10/dcn10_stream_encoder.h  | 16 ++--
 .../amd/display/dc/dcn20/dcn20_stream_encoder.c  |  1 -
 .../display/dc/dcn30/dcn30_dio_stream_encoder.c  |  4 +++-
 .../display/dc/dcn30/dcn30_dio_stream_encoder.h  |  6 ++
 .../drm/amd/display/dc/inc/hw/stream_encoder.h   |  1 +
 .../amd/include/asic_reg/dce/dce_6_0_sh_mask.h   |  2 ++
 10 files changed, 31 insertions(+), 30 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_audio.c 
b/drivers/gpu/drm/amd/display/dc/dce/dce_audio.c
index 70eaac017624..c06888add4a0 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dce_audio.c
+++ b/drivers/gpu/drm/amd/display/dc/dce/dce_audio.c
@@ -486,6 +486,17 @@ void dce_aud_az_configure(
 
AZ_REG_WRITE(AZALIA_F0_CODEC_PIN_CONTROL_CHANNEL_SPEAKER, value);
 
+   /*  ACP Data - Supports AI  */
+   value = AZ_REG_READ(AZALIA_F0_CODEC_PIN_CONTROL_ACP_DATA);
+
+   set_reg_field_value(
+   value,
+   audio_info->flags.info.SUPPORT_AI,
+   AZALIA_F0_CODEC_PIN_CONTROL_ACP_DATA,
+   SUPPORTS_AI);
+
+   AZ_REG_WRITE(AZALIA_F0_CODEC_PIN_CONTROL_ACP_DATA, value);
+
/*  Audio Descriptors   */
/* pass through all formats */
for (format_index = 0; format_index < AUDIO_FORMAT_CODE_COUNT;
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_stream_encoder.c 
b/drivers/gpu/drm/amd/display/dc/dce/dce_stream_encoder.c
index a8c92b517df1..f7e1027d4b3a 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dce_stream_encoder.c
+++ b/drivers/gpu/drm/amd/display/dc/dce/dce_stream_encoder.c
@@ -33,7 +33,6 @@
 #define DC_LOGGER \
enc110->base.ctx->logger
 
-
 #define REG(reg)\
(enc110->regs->reg)
 
@@ -635,6 +634,8 @@ static void dce110_stream_encoder_hdmi_set_stream_attribute(
HDMI_GC_SEND, 1,
HDMI_NULL_SEND, 1);
 
+   REG_UPDATE(HDMI_VBI_PACKET_CONTROL, HDMI_ACP_SEND, 0);
+
/* following belongs to audio */
REG_UPDATE(HDMI_INFOFRAME_CONTROL0, HDMI_AUDIO_INFO_SEND, 1);
 
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_stream_encoder.h 
b/drivers/gpu/drm/amd/display/dc/dce/dce_stream_encoder.h
index f9cdf2b5242c..cc5020a8e1e1 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dce_stream_encoder.h
+++ b/drivers/gpu/drm/amd/display/dc/dce/dce_stream_encoder.h
@@ -115,7 +115,7 @@
 #define SE_SF(reg_name, field_name, post_fix)\
.field_name = reg_name ## __ ## field_name ## post_fix
 
-#define SE_COMMON_MASK_SH_LIST_DCE_COMMON_BASE(mask_sh)\
+#define SE_COMMON_MASK_SH_LIST_DCE_COMMON(mask_sh)\
SE_SF(AFMT_VBI_PACKET_CONTROL, AFMT_GENERIC_INDEX, mask_sh),\
SE_SF(AFMT_VBI_PACKET_CONTROL, AFMT_GENERIC0_UPDATE, mask_sh),\
SE_SF(AFMT_VBI_PACKET_CONTROL, AFMT_GENERIC2_UPDATE, mask_sh),\
@@ -140,6 +140,7 @@
SE_SF(HDMI_VBI_PACKET_CONTROL, HDMI_GC_CONT, mask_sh),\
SE_SF(HDMI_VBI_PACKET_CONTROL, HDMI_GC_SEND, mask_sh),\
SE_SF(HDMI_VBI_PACKET_CONTROL, HDMI_NULL_SEND, mask_sh),\
+   SE_SF(HDMI_VBI_PACKET_CONTROL, HDMI_ACP_SEND, mask_sh),\
SE_SF(HDMI_INFOFRAME_CONTROL0, HDMI_AUDIO_INFO_SEND, mask_sh),\
SE_SF(AFMT_INFOFRAME_CONTROL0, AFMT_AUDIO_INFO_UPDATE, mask_sh),\
SE_SF(HDMI_INFOFRAME_CONTROL1, HDMI_AUDIO_INFO_LINE, mask_sh),\
@@ -202,10 +203,7 @@
SE_SF(AFMT_AUDIO_PACKET_CONTROL, AFMT_AUDIO_SAMPLE_SEND, mask_sh),\
SE_SF(DIG_FE_CNTL, DIG_SOURCE_SELECT, mask_sh)
 
-#define SE_COMMON_MASK_SH_LIST_DCE_COMMON(mask_sh)\
-   SE_COMMON_MASK_SH_LIST_DCE_COMMON_BASE(mask_sh)
-
-#define SE_COMMON_MASK_SH_LIST_SOC_BASE(mask_sh)\
+#define SE_COMMON_MASK_SH_LIST_SOC(mask_sh)\
SE_SF(DIG0_AFMT_VBI_PACKET_CONTROL, AFMT_GENERIC_INDEX, mask_sh),\
SE_SF(DIG0_AFMT_GENERIC_HDR, AFMT_GENERIC_HB0, mask_sh),\
SE_SF(DIG0_AFMT_GENERIC_HDR, AFMT_GENERIC_HB1, mask_sh),\
@@ -227,6 +225,7 @@
SE_SF(DIG0_HDMI_VBI_PACKET_CONTROL, HDMI_GC_CONT, mask_sh),\
SE_SF(DIG0_HDMI_VBI_PACKET_CONTROL, HDMI_GC_SEND, mask_sh),\
SE_SF(DIG0_HDMI_VBI_PACKET_CONTROL, HDMI_NULL_SEND, mask_sh),\
+   SE_SF(DIG0_HDMI_VBI_PACKET_CONTROL, HDMI_ACP_SEND, mask_sh),\
SE_SF(DIG0_HDMI_INFOFRAME_CONTROL0, HDMI_AUDIO_INFO_SEND, mask_sh),\
SE_SF(DIG0_AFMT_INFOFRAME_CONTROL0, AFMT_AUDIO_INFO_UPDATE, mask_sh),\
SE_SF(DIG0_HDMI_INFOFRAME_CONTROL1, HDMI_AUDIO_INFO_LINE, mask_sh),\
@@ -288,9 +287,6 @@
SE_SF(DIG0_DIG_FE_CNTL, 

[PATCH 32/40] drm/amd/display: Fix null timing generator resource

2022-06-30 Thread Rodrigo Siqueira
From: Eric Bernstein 

[Why]
For some customer blending transition cases, the
available pipe for second stream is a pipe index that is
greater than the number of timing generators, which
can cause a problem in acquire_first_free_pipe since it
assumes same index for pipe and timing generator

[How]
Added logic to use last timing generator index
if the pipe index is greater than number of timing generators.

Acked-by: Alan Liu 
Signed-off-by: Eric Bernstein 
---
 drivers/gpu/drm/amd/display/dc/core/dc_resource.c | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c 
b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
index 3d45f6cae1f7..f7b47bf3ee59 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
@@ -1885,6 +1885,12 @@ static int acquire_first_free_pipe(
pipe_ctx->plane_res.mpcc_inst = 
pool->dpps[i]->inst;
pipe_ctx->pipe_idx = i;
 
+   if (i >= pool->timing_generator_count) {
+   int tg_inst = pool->timing_generator_count - 1;
+
+   pipe_ctx->stream_res.tg = 
pool->timing_generators[tg_inst];
+   pipe_ctx->stream_res.opp = pool->opps[tg_inst];
+   }
 
pipe_ctx->stream = stream;
return i;
-- 
2.25.1



[PATCH 25/40] drm/amd/display: Fix dmub soft hang for PSR 1

2022-06-30 Thread Rodrigo Siqueira
From: Fangzhi Zuo 

[Why]
Unexpected change of aux hw mapping causes dmub soft hang when
initiate aux transation at wrong aux channel.

ddc_channel stands for hw dp aux index which is from vbios,
but link_index is pure software concept for link count depending on which link
is probed first. They are not interchangeable.

dmub aux transaction could pass if happens eDP link_index gets
the same value as vbios ddc_channel, e.g., ddc_channel = 1, link_index = 1
if they gets different, e.g., ddc_channel = 2, link_index = 0, overwrite
ddc_channel with link_index will have wrong ddc channel being used for aux
transaction in dmub PSR, cause aux transaction soft hang.

[How]
ddc_channel mapping to each link is determined by vbios and further
parsed in dc. Such info. should not be touched in any kind, otherwise
the mapping is screwed up leading to aux transaction timeout.

Reviewed-by: Aurabindo Jayamohanan Pillai 
Acked-by: Rodrigo Siqueira 
Signed-off-by: Fangzhi Zuo 
---
 drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index d0206d98217b..5d150f85e1d5 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -8639,7 +8639,7 @@ static int amdgpu_dm_i2c_xfer(struct i2c_adapter 
*i2c_adap,
 
if (dc_submit_i2c(
ddc_service->ctx->dc,
-   ddc_service->ddc_pin->hw_info.ddc_channel,
+   ddc_service->link->link_index,
))
result = num;
 
@@ -8675,8 +8675,6 @@ create_i2c(struct ddc_service *ddc_service,
snprintf(i2c->base.name, sizeof(i2c->base.name), "AMDGPU DM i2c hw bus 
%d", link_index);
i2c_set_adapdata(>base, i2c);
i2c->ddc_service = ddc_service;
-   if (i2c->ddc_service->ddc_pin)
-   i2c->ddc_service->ddc_pin->hw_info.ddc_channel = link_index;
 
return i2c;
 }
-- 
2.25.1



[PATCH 31/40] drm/amd/display: Add flag to modify MST delay

2022-06-30 Thread Rodrigo Siqueira
From: Duncan Ma 

[Why]
Some panels may require more MST delay on discovery

[How]
Add panel patch and debug mst delay flag

Reviewed-by: Charlene Liu 
Acked-by: Rodrigo Siqueira 
Signed-off-by: Duncan Ma 
---
 drivers/gpu/drm/amd/display/dc/dc.h   | 3 ++-
 drivers/gpu/drm/amd/display/dc/dc_types.h | 1 +
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dc.h 
b/drivers/gpu/drm/amd/display/dc/dc.h
index 337bbd4c6642..b9a23e6b3696 100644
--- a/drivers/gpu/drm/amd/display/dc/dc.h
+++ b/drivers/gpu/drm/amd/display/dc/dc.h
@@ -741,14 +741,15 @@ struct dc_debug_options {
bool enable_sw_cntl_psr;
union dpia_debug_options dpia_debug;
bool disable_fixed_vs_aux_timeout_wa;
-   uint32_t fixed_vs_aux_delay_config_wa;
bool force_disable_subvp;
bool force_subvp_mclk_switch;
bool force_usr_allow;
/* uses value at boot and disables switch */
bool disable_dtb_ref_clk_switch;
+   uint32_t fixed_vs_aux_delay_config_wa;
bool extended_blank_optimization;
union aux_wake_wa_options aux_wake_wa;
+   uint32_t mst_start_top_delay;
uint8_t psr_power_use_phy_fsm;
enum dml_hostvm_override_opts dml_hostvm_override;
bool use_legacy_soc_bb_mechanism;
diff --git a/drivers/gpu/drm/amd/display/dc/dc_types.h 
b/drivers/gpu/drm/amd/display/dc/dc_types.h
index 413738fe9d59..084074a23031 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_types.h
+++ b/drivers/gpu/drm/amd/display/dc/dc_types.h
@@ -199,6 +199,7 @@ struct dc_panel_patch {
unsigned int embedded_tiled_slave;
unsigned int disable_fams;
unsigned int skip_avmute;
+   unsigned int mst_start_top_delay;
 };
 
 struct dc_edid_caps {
-- 
2.25.1



[PATCH 27/40] drm/amd/display: Indicate stream change on ODM change

2022-06-30 Thread Rodrigo Siqueira
From: Chris Park 

[Why]
With ODM policy 2 to 1, there exists a new use case
scenario where stream content is unchanged, but ODM
may be used.  When this happens, the stream needs
to be committed with a new pipe setting.
This did not happen due to stream change
detection logic not accounting for ODM.

[How]
Set ODM flag in stream and commit stream when change
in ODM has been detected due to policy change.

Reviewed-by: Samson Tam 
Acked-by: Rodrigo Siqueira 
Signed-off-by: Chris Park 
---
 drivers/gpu/drm/amd/display/dc/core/dc_resource.c | 3 +++
 drivers/gpu/drm/amd/display/dc/dc_stream.h| 2 ++
 2 files changed, 5 insertions(+)

diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c 
b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
index b67fdb31f75f..3d45f6cae1f7 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
@@ -1702,6 +1702,9 @@ bool dc_is_stream_unchanged(
if (memcmp(_stream->audio_info, >audio_info, 
sizeof(stream->audio_info)) != 0)
return false;
 
+   if (old_stream->odm_2to1_policy_applied != 
stream->odm_2to1_policy_applied)
+   return false;
+
return true;
 }
 
diff --git a/drivers/gpu/drm/amd/display/dc/dc_stream.h 
b/drivers/gpu/drm/amd/display/dc/dc_stream.h
index 2a2f719587ee..c3d97206ed89 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_stream.h
+++ b/drivers/gpu/drm/amd/display/dc/dc_stream.h
@@ -276,6 +276,8 @@ struct dc_stream_state {
bool has_non_synchronizable_pclk;
bool vblank_synchronized;
struct mall_stream_config mall_stream_config;
+
+   bool odm_2to1_policy_applied;
 };
 
 #define ABM_LEVEL_IMMEDIATE_DISABLE 255
-- 
2.25.1



[PATCH 34/40] drm/amd/display: Fix uninitialized variable.

2022-06-30 Thread Rodrigo Siqueira
From: Jimmy Kizito 

[Why]
Uninitialized variable causes diag compilation build failure.

[How]
- Ensure that variable in question is always initialized before being
used.
- The variable in question is the USB4 DP training pattern. In case an
unsupported training pattern has been requested, update status
accordingly and abort current link training attempt.

Reviewed-by: Meenakshikumar Somasundaram 
Acked-by: Alan Liu 
Signed-off-by: Jimmy Kizito 
---
 .../drm/amd/display/dc/core/dc_link_dpia.c| 49 +++
 1 file changed, 28 insertions(+), 21 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_dpia.c 
b/drivers/gpu/drm/amd/display/dc/core/dc_link_dpia.c
index c8610a5dedfa..3b8acda50701 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_link_dpia.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_dpia.c
@@ -202,30 +202,34 @@ static uint8_t dpia_build_set_config_data(enum 
dpia_set_config_type type,
 }
 
 /* Convert DC training pattern to DPIA training stage. */
-static enum dpia_set_config_ts convert_trng_ptn_to_trng_stg(enum 
dc_dp_training_pattern tps)
+static enum dc_status convert_trng_ptn_to_trng_stg(enum dc_dp_training_pattern 
tps, enum dpia_set_config_ts *ts)
 {
-   enum dpia_set_config_ts ts;
+   enum dc_status status = DC_OK;
 
switch (tps) {
case DP_TRAINING_PATTERN_SEQUENCE_1:
-   ts = DPIA_TS_TPS1;
+   *ts = DPIA_TS_TPS1;
break;
case DP_TRAINING_PATTERN_SEQUENCE_2:
-   ts = DPIA_TS_TPS2;
+   *ts = DPIA_TS_TPS2;
break;
case DP_TRAINING_PATTERN_SEQUENCE_3:
-   ts = DPIA_TS_TPS3;
+   *ts = DPIA_TS_TPS3;
break;
case DP_TRAINING_PATTERN_SEQUENCE_4:
-   ts = DPIA_TS_TPS4;
+   *ts = DPIA_TS_TPS4;
break;
-   default:
-   ts = DPIA_TS_DPRX_DONE;
-   ASSERT(false); /* TPS not supported by helper function. */
+   case DP_TRAINING_PATTERN_VIDEOIDLE:
+   *ts = DPIA_TS_DPRX_DONE;
+   break;
+   default: /* TPS not supported by helper function. */
+   ASSERT(false);
+   *ts = DPIA_TS_DPRX_DONE;
+   status = DC_UNSUPPORTED_VALUE;
break;
}
 
-   return ts;
+   return status;
 }
 
 /* Write training pattern to DPCD. */
@@ -336,10 +340,7 @@ static enum link_training_result 
dpia_training_cr_non_transparent(
/* DPOA-to-x */
/* Instruct DPOA to transmit TPS1 then update DPCD. */
if (retry_count == 0) {
-   ts = 
convert_trng_ptn_to_trng_stg(lt_settings->pattern_for_cr);
-   status = core_link_send_set_config(link,
-   DPIA_SET_CFG_SET_TRAINING,
-   ts);
+   status = 
convert_trng_ptn_to_trng_stg(lt_settings->pattern_for_cr, );
if (status != DC_OK) {
result = LINK_TRAINING_ABORT;
break;
@@ -421,13 +422,14 @@ static enum link_training_result 
dpia_training_cr_non_transparent(
if (link->is_hpd_pending)
result = LINK_TRAINING_ABORT;
 
-   DC_LOG_HW_LINK_TRAINING("%s\n DPIA(%d) clock recovery\n"
-   " -hop(%d)\n - result(%d)\n - retries(%d)\n",
+   DC_LOG_HW_LINK_TRAINING(
+   "%s\n DPIA(%d) clock recovery\n -hop(%d)\n - result(%d)\n - 
retries(%d)\n - status(%d)\n",
__func__,
link->link_id.enum_id - ENUM_ID_1,
hop,
result,
-   retry_count);
+   retry_count,
+   status);
 
return result;
 }
@@ -631,7 +633,11 @@ static enum link_training_result 
dpia_training_eq_non_transparent(
 
/* Instruct DPOA to transmit TPSn then update DPCD. */
if (retries_eq == 0) {
-   ts = convert_trng_ptn_to_trng_stg(tr_pattern);
+   status = convert_trng_ptn_to_trng_stg(tr_pattern, );
+   if (status != DC_OK) {
+   result = LINK_TRAINING_ABORT;
+   break;
+   }
status = core_link_send_set_config(link,
DPIA_SET_CFG_SET_TRAINING,
ts);
@@ -712,13 +718,14 @@ static enum link_training_result 
dpia_training_eq_non_transparent(
if (link->is_hpd_pending)
result = LINK_TRAINING_ABORT;
 
-   DC_LOG_HW_LINK_TRAINING("%s\n DPIA(%d) equalization\n"
-   " - hop(%d)\n - result(%d)\n - retries(%d)\n",
+   DC_LOG_HW_LINK_TRAINING(
+   "%s\n DPIA(%d) equalization\n - hop(%d)\n - result(%d)\n - 
retries(%d)\n - status(%d)\n",

[PATCH 20/40] drm/amd/display: add an option to skip wait for HPD when powering on eDP panel

2022-06-30 Thread Rodrigo Siqueira
From: Evgenii Krasnikov 

[HOW/WHY]
Add an option to skip edp_wait_for_hpd_ready when necessary

Reviewed-by: Jayendran Ramani 
Acked-by: Rodrigo Siqueira 
Signed-off-by: Evgenii Krasnikov 
---
 drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c | 5 +++--
 drivers/gpu/drm/amd/display/dc/inc/dc_link_dp.h  | 6 +++---
 2 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c 
b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c
index 1982ec0b55d4..e0660e7356c9 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c
@@ -6971,13 +6971,14 @@ bool is_dp_128b_132b_signal(struct pipe_ctx *pipe_ctx)
dc_is_dp_signal(pipe_ctx->stream->signal));
 }
 
-void edp_panel_backlight_power_on(struct dc_link *link)
+void edp_panel_backlight_power_on(struct dc_link *link, bool wait_for_hpd)
 {
if (link->connector_signal != SIGNAL_TYPE_EDP)
return;
 
link->dc->hwss.edp_power_control(link, true);
-   link->dc->hwss.edp_wait_for_hpd_ready(link, true);
+   if (wait_for_hpd)
+   link->dc->hwss.edp_wait_for_hpd_ready(link, true);
if (link->dc->hwss.edp_backlight_control)
link->dc->hwss.edp_backlight_control(link, true);
 }
diff --git a/drivers/gpu/drm/amd/display/dc/inc/dc_link_dp.h 
b/drivers/gpu/drm/amd/display/dc/inc/dc_link_dp.h
index 44f167d2584f..aaa17d4dd46b 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/dc_link_dp.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/dc_link_dp.h
@@ -193,6 +193,7 @@ enum dc_status dpcd_configure_lttpr_mode(
struct link_training_settings *lt_settings);
 
 enum dp_link_encoding dp_get_link_encoding_format(const struct 
dc_link_settings *link_settings);
+bool dp_retrieve_lttpr_cap(struct dc_link *link);
 bool dpcd_write_128b_132b_sst_payload_allocation_table(
const struct dc_stream_state *stream,
struct dc_link *link,
@@ -214,11 +215,10 @@ void enable_dp_hpo_output(struct dc_link *link,
 void disable_dp_hpo_output(struct dc_link *link,
const struct link_resource *link_res,
enum signal_type signal);
+
 void setup_dp_hpo_stream(struct pipe_ctx *pipe_ctx, bool enable);
 bool is_dp_128b_132b_signal(struct pipe_ctx *pipe_ctx);
-
-bool dp_retrieve_lttpr_cap(struct dc_link *link);
-void edp_panel_backlight_power_on(struct dc_link *link);
+void edp_panel_backlight_power_on(struct dc_link *link, bool wait_for_hpd);
 void dp_receiver_power_ctrl(struct dc_link *link, bool on);
 void dp_source_sequence_trace(struct dc_link *link, uint8_t dp_test_mode);
 void dp_enable_link_phy(
-- 
2.25.1



[PATCH 29/40] drm/amd/display: Fix refresh rate issue on Club 3D

2022-06-30 Thread Rodrigo Siqueira
Recently we introduced a patch for fixing an MST issue, but it caused a
regression on Club 3D since we could not set a refresh rate higher than
60Hz. This commit fixes this issue by adding a proper check after
validating the stream.

Fixes: 1bd038dc60e3 ("drm/amd/display: add mst port output bw check")
Reviewed-by: Hersen Wu 
Acked-by: Rodrigo Siqueira 
Signed-off-by: Rodrigo Siqueira 
---
 drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 6 ++
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index 5d150f85e1d5..7642411ec6db 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -7210,12 +7210,10 @@ create_validate_stream_for_sink(struct 
amdgpu_dm_connector *aconnector,
break;
}
 
-   if (stream->signal == SIGNAL_TYPE_DISPLAY_PORT_MST)
+   dc_result = dc_validate_stream(adev->dm.dc, stream);
+   if (dc_result == DC_OK && stream->signal == 
SIGNAL_TYPE_DISPLAY_PORT_MST)
dc_result = dm_dp_mst_is_port_support_mode(aconnector, 
stream);
 
-   if (dc_result == DC_OK)
-   dc_result = dc_validate_stream(adev->dm.dc, stream);
-
if (dc_result != DC_OK) {
DRM_DEBUG_KMS("Mode %dx%d (clk %d) failed DC validation 
with error %d (%s)\n",
  drm_mode->hdisplay,
-- 
2.25.1



[PATCH 26/40] drm/amd/display: disable otg toggle w/a on boot

2022-06-30 Thread Rodrigo Siqueira
From: Dmytro Laktyushkin 

This w/a has a bad interaction with seamless boot toggling an
active stream. Most panels recover, however some fail leading
to display corruption.

Reviewed-by: Charlene Liu 
Acked-by: Rodrigo Siqueira 
Signed-off-by: Dmytro Laktyushkin 
---
 .../gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_clk_mgr.c | 7 +--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_clk_mgr.c 
b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_clk_mgr.c
index f4381725b210..36b0cd47c1c7 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_clk_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_clk_mgr.c
@@ -173,11 +173,14 @@ static void dcn315_update_clocks(struct clk_mgr 
*clk_mgr_base,
}
 
if (should_set_clock(safe_to_lower, new_clocks->dispclk_khz, 
clk_mgr_base->clks.dispclk_khz)) {
-   dcn315_disable_otg_wa(clk_mgr_base, true);
+   /* No need to apply the w/a if we haven't taken over from bios 
yet */
+   if (clk_mgr_base->clks.dispclk_khz)
+   dcn315_disable_otg_wa(clk_mgr_base, true);
 
clk_mgr_base->clks.dispclk_khz = new_clocks->dispclk_khz;
dcn315_smu_set_dispclk(clk_mgr, clk_mgr_base->clks.dispclk_khz);
-   dcn315_disable_otg_wa(clk_mgr_base, false);
+   if (clk_mgr_base->clks.dispclk_khz)
+   dcn315_disable_otg_wa(clk_mgr_base, false);
 
update_dispclk = true;
}
-- 
2.25.1



[PATCH 22/40] drm/amd/display: Guard against ddc_pin being NULL for AUX

2022-06-30 Thread Rodrigo Siqueira
From: Nicholas Kazlauskas 

[Why]
In the case where we don't support DMUB aux but we have DPIA links
in the configuration we might try to message AUX using the legacy
path - where DDC pin is NULL. This causes a NULL pointer dereference.

[How]
Guard against NULL DDC pin, return a failure for aux engine acquire.

Reviewed-by: Michael Strauss 
Acked-by: Rodrigo Siqueira 
Signed-off-by: Nicholas Kazlauskas 
---
 drivers/gpu/drm/amd/display/dc/dce/dce_aux.c | 5 +
 1 file changed, 5 insertions(+)

diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_aux.c 
b/drivers/gpu/drm/amd/display/dc/dce/dce_aux.c
index 9e39cd7b203e..49d3145ae8fb 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dce_aux.c
+++ b/drivers/gpu/drm/amd/display/dc/dce/dce_aux.c
@@ -572,6 +572,11 @@ int dce_aux_transfer_raw(struct ddc_service *ddc,
 
memset(_req, 0, sizeof(aux_req));
 
+   if (ddc_pin == NULL) {
+   *operation_result = AUX_RET_ERROR_ENGINE_ACQUIRE;
+   return -1;
+   }
+
aux_engine = ddc->ctx->dc->res_pool->engines[ddc_pin->pin_data->en];
if (!acquire(aux_engine, ddc_pin)) {
*operation_result = AUX_RET_ERROR_ENGINE_ACQUIRE;
-- 
2.25.1



[PATCH 33/40] drm/amd/display: Move all linux includes into OS types

2022-06-30 Thread Rodrigo Siqueira
From: Harry Wentland 

Move all linux includes into OS types.

Acked-by: Alan Liu 
Signed-off-by: Harry Wentland 
---
 drivers/gpu/drm/amd/display/dc/basics/vector.c   | 2 --
 drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c   | 2 --
 drivers/gpu/drm/amd/display/dc/core/dc.c | 3 ---
 drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c| 2 --
 drivers/gpu/drm/amd/display/dc/core/dc_link_ddc.c| 2 --
 drivers/gpu/drm/amd/display/dc/core/dc_resource.c| 2 --
 drivers/gpu/drm/amd/display/dc/core/dc_sink.c| 2 --
 drivers/gpu/drm/amd/display/dc/core/dc_stream.c  | 3 ---
 drivers/gpu/drm/amd/display/dc/core/dc_surface.c | 2 --
 drivers/gpu/drm/amd/display/dc/dce/dce_audio.c   | 2 --
 drivers/gpu/drm/amd/display/dc/dce/dce_aux.c | 3 ---
 drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c| 2 --
 drivers/gpu/drm/amd/display/dc/dce/dce_dmcu.c| 3 ---
 drivers/gpu/drm/amd/display/dc/dce/dce_i2c_sw.c  | 2 --
 drivers/gpu/drm/amd/display/dc/dce/dce_ipp.c | 2 --
 drivers/gpu/drm/amd/display/dc/dce/dce_link_encoder.c| 3 ---
 drivers/gpu/drm/amd/display/dc/dce/dce_opp.c | 2 --
 drivers/gpu/drm/amd/display/dc/dce/dce_stream_encoder.c  | 2 --
 drivers/gpu/drm/amd/display/dc/dce100/dce100_resource.c  | 2 --
 drivers/gpu/drm/amd/display/dc/dce110/dce110_compressor.c| 3 ---
 drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c  | 2 --
 drivers/gpu/drm/amd/display/dc/dce110/dce110_opp_regamma_v.c | 2 --
 drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.c  | 2 --
 drivers/gpu/drm/amd/display/dc/dce110/dce110_transform_v.c   | 2 --
 drivers/gpu/drm/amd/display/dc/dce112/dce112_compressor.c| 3 ---
 drivers/gpu/drm/amd/display/dc/dce112/dce112_resource.c  | 2 --
 drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c  | 2 --
 drivers/gpu/drm/amd/display/dc/dce80/dce80_resource.c| 2 --
 drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.c  | 2 --
 drivers/gpu/drm/amd/display/dc/dcn10/dcn10_ipp.c | 2 --
 drivers/gpu/drm/amd/display/dc/dcn10/dcn10_link_encoder.c| 3 ---
 drivers/gpu/drm/amd/display/dc/dcn10/dcn10_opp.c | 2 --
 drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c| 2 --
 drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.c  | 2 --
 drivers/gpu/drm/amd/display/dc/gpio/gpio_base.c  | 2 --
 drivers/gpu/drm/amd/display/dc/gpio/gpio_service.c   | 2 --
 drivers/gpu/drm/amd/display/dc/gpio/hw_ddc.c | 3 ---
 drivers/gpu/drm/amd/display/dc/gpio/hw_factory.c | 2 --
 drivers/gpu/drm/amd/display/dc/gpio/hw_hpd.c | 2 --
 .../gpu/drm/amd/display/dc/irq/dce110/irq_service_dce110.c   | 2 --
 .../gpu/drm/amd/display/dc/irq/dce120/irq_service_dce120.c   | 2 --
 drivers/gpu/drm/amd/display/dc/irq/dce80/irq_service_dce80.c | 2 --
 drivers/gpu/drm/amd/display/dc/irq/dcn10/irq_service_dcn10.c | 2 --
 drivers/gpu/drm/amd/display/dc/irq/irq_service.c | 2 --
 drivers/gpu/drm/amd/display/dc/os_types.h| 5 -
 .../gpu/drm/amd/display/dc/virtual/virtual_link_encoder.c| 2 --
 .../gpu/drm/amd/display/dc/virtual/virtual_stream_encoder.c  | 2 --
 drivers/gpu/drm/amd/display/modules/color/color_gamma.c  | 3 ---
 drivers/gpu/drm/amd/display/modules/freesync/freesync.c  | 2 --
 49 files changed, 4 insertions(+), 107 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/basics/vector.c 
b/drivers/gpu/drm/amd/display/dc/basics/vector.c
index 706c803c4d3b..84aeccf36b4b 100644
--- a/drivers/gpu/drm/amd/display/dc/basics/vector.c
+++ b/drivers/gpu/drm/amd/display/dc/basics/vector.c
@@ -23,8 +23,6 @@
  *
  */
 
-#include 
-
 #include "dm_services.h"
 #include "include/vector.h"
 
diff --git a/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c 
b/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c
index 25791ed0559d..c332650b7048 100644
--- a/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c
+++ b/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c
@@ -23,8 +23,6 @@
  *
  */
 
-#include 
-
 #include "dm_services.h"
 
 #include "ObjectID.h"
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c 
b/drivers/gpu/drm/amd/display/dc/core/dc.c
index 8ed208e5def2..89a2f6749239 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc.c
@@ -22,9 +22,6 @@
  * Authors: AMD
  */
 
-#include 
-#include 
-
 #include "dm_services.h"
 
 #include "dc.h"
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c 
b/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c
index 9039fb134db5..2a8007928210 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c
@@ -23,8 +23,6 @@
  *
  */
 
-#include 
-
 #include "dm_services.h"
 #include 

[PATCH 18/40] drm/amd/display: Extend soc BB capabilitiy

2022-06-30 Thread Rodrigo Siqueira
From: Jun Lei 

[why]
Some parts are consuming dangerously close to maximum number of states
supported when updating the BB (i.e. 8).

[how]
Change maximum stages from 9 to 20.

Acked-by: Rodrigo Siqueira 
Signed-off-by: Jun Lei 
---
 .../drm/amd/display/dc/dcn32/dcn32_resource.c | 508 ++
 .../amd/display/dc/dcn321/dcn321_resource.c   | 503 +
 2 files changed, 784 insertions(+), 227 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c 
b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c
index 1f2af676191b..e9ecc27a51de 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c
@@ -3410,6 +3410,277 @@ void dcn32_calculate_dlg_params(struct dc *dc, struct 
dc_state *context, display
}
 }
 
+static void get_optimal_ntuple(struct _vcs_dpi_voltage_scaling_st *entry)
+{
+   if (entry->dcfclk_mhz > 0) {
+   float bw_on_sdp = entry->dcfclk_mhz * 
dcn3_2_soc.return_bus_width_bytes * 
((float)dcn3_2_soc.pct_ideal_sdp_bw_after_urgent / 100);
+
+   entry->fabricclk_mhz = bw_on_sdp / 
(dcn3_2_soc.return_bus_width_bytes * 
((float)dcn3_2_soc.pct_ideal_fabric_bw_after_urgent / 100));
+   entry->dram_speed_mts = bw_on_sdp / (dcn3_2_soc.num_chans *
+   dcn3_2_soc.dram_channel_width_bytes * 
((float)dcn3_2_soc.pct_ideal_dram_sdp_bw_after_urgent_pixel_only / 100));
+   } else if (entry->fabricclk_mhz > 0) {
+   float bw_on_fabric = entry->fabricclk_mhz * 
dcn3_2_soc.return_bus_width_bytes * 
((float)dcn3_2_soc.pct_ideal_fabric_bw_after_urgent / 100);
+
+   entry->dcfclk_mhz = bw_on_fabric / 
(dcn3_2_soc.return_bus_width_bytes * 
((float)dcn3_2_soc.pct_ideal_sdp_bw_after_urgent / 100));
+   entry->dram_speed_mts = bw_on_fabric / (dcn3_2_soc.num_chans *
+   dcn3_2_soc.dram_channel_width_bytes * 
((float)dcn3_2_soc.pct_ideal_dram_sdp_bw_after_urgent_pixel_only / 100));
+   } else if (entry->dram_speed_mts > 0) {
+   float bw_on_dram = entry->dram_speed_mts * dcn3_2_soc.num_chans 
*
+   dcn3_2_soc.dram_channel_width_bytes * 
((float)dcn3_2_soc.pct_ideal_dram_sdp_bw_after_urgent_pixel_only / 100);
+
+   entry->fabricclk_mhz = bw_on_dram / 
(dcn3_2_soc.return_bus_width_bytes * 
((float)dcn3_2_soc.pct_ideal_fabric_bw_after_urgent / 100));
+   entry->dcfclk_mhz = bw_on_dram / 
(dcn3_2_soc.return_bus_width_bytes * 
((float)dcn3_2_soc.pct_ideal_sdp_bw_after_urgent / 100));
+   }
+}
+
+static float calculate_net_bw_in_kbytes_sec(struct _vcs_dpi_voltage_scaling_st 
*entry)
+{
+   float memory_bw_kbytes_sec = entry->dram_speed_mts * 
dcn3_2_soc.num_chans *
+   dcn3_2_soc.dram_channel_width_bytes * 
((float)dcn3_2_soc.pct_ideal_dram_sdp_bw_after_urgent_pixel_only / 100);
+
+   float fabric_bw_kbytes_sec = entry->fabricclk_mhz * 
dcn3_2_soc.return_bus_width_bytes * 
((float)dcn3_2_soc.pct_ideal_fabric_bw_after_urgent / 100);
+
+   float sdp_bw_kbytes_sec = entry->dcfclk_mhz * 
dcn3_2_soc.return_bus_width_bytes * 
((float)dcn3_2_soc.pct_ideal_sdp_bw_after_urgent / 100);
+
+   float limiting_bw_kbytes_sec = memory_bw_kbytes_sec;
+
+   if (fabric_bw_kbytes_sec < limiting_bw_kbytes_sec)
+   limiting_bw_kbytes_sec = fabric_bw_kbytes_sec;
+
+   if (sdp_bw_kbytes_sec < limiting_bw_kbytes_sec)
+   limiting_bw_kbytes_sec = sdp_bw_kbytes_sec;
+
+   return limiting_bw_kbytes_sec;
+}
+
+static void insert_entry_into_table_sorted(struct _vcs_dpi_voltage_scaling_st 
*table, unsigned int *num_entries,
+   struct _vcs_dpi_voltage_scaling_st *entry)
+{
+   int index = 0;
+   int i = 0;
+   float net_bw_of_new_state = 0;
+
+   if (*num_entries == 0) {
+   table[0] = *entry;
+   (*num_entries)++;
+   } else {
+   net_bw_of_new_state = calculate_net_bw_in_kbytes_sec(entry);
+   while (net_bw_of_new_state > 
calculate_net_bw_in_kbytes_sec([index])) {
+   index++;
+   if (index >= *num_entries)
+   break;
+   }
+
+   for (i = *num_entries; i > index; i--) {
+   table[i] = table[i - 1];
+   }
+
+   table[index] = *entry;
+   (*num_entries)++;
+   }
+}
+
+static void remove_entry_from_table_at_index(struct 
_vcs_dpi_voltage_scaling_st *table, unsigned int *num_entries,
+   unsigned int index)
+{
+   int i;
+
+   if (*num_entries == 0)
+   return;
+
+   for (i = index; i < *num_entries - 1; i++) {
+   table[i] = table[i + 1];
+   }
+   memset([--(*num_entries)], 0, sizeof(struct 
_vcs_dpi_voltage_scaling_st));
+}
+
+static int build_synthetic_soc_states(struct 

[PATCH 15/40] drm/amd/display: disable timing sync b/w odm halves

2022-06-30 Thread Rodrigo Siqueira
From: Dmytro Laktyushkin 

Fix for a bug where we would try to timing sync 2 odm halves.

Acked-by: Rodrigo Siqueira 
Signed-off-by: Dmytro Laktyushkin 
---
 drivers/gpu/drm/amd/display/dc/core/dc.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c 
b/drivers/gpu/drm/amd/display/dc/core/dc.c
index 40848eda44d9..795766cb27dd 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc.c
@@ -1333,7 +1333,9 @@ static void program_timing_sync(
struct pipe_ctx *unsynced_pipes[MAX_PIPES] = { NULL };
 
for (i = 0; i < pipe_count; i++) {
-   if (!ctx->res_ctx.pipe_ctx[i].stream || 
ctx->res_ctx.pipe_ctx[i].top_pipe)
+   if (!ctx->res_ctx.pipe_ctx[i].stream
+   || ctx->res_ctx.pipe_ctx[i].top_pipe
+   || ctx->res_ctx.pipe_ctx[i].prev_odm_pipe)
continue;
 
unsynced_pipes[i] = >res_ctx.pipe_ctx[i];
-- 
2.25.1



[PATCH 16/40] drm/amd/display: guard for virtual calling destroy_link_encoders

2022-06-30 Thread Rodrigo Siqueira
From: Martin Leung 

[Why]:
On power down, virtual dal may try to delete link_encoders by
referencing uninitialized res_pool.

[How]:
Added guard against empty res_pool.

Acked-by: Rodrigo Siqueira 
Signed-off-by: Martin Leung 
---
 drivers/gpu/drm/amd/display/dc/core/dc.c | 10 --
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c 
b/drivers/gpu/drm/amd/display/dc/core/dc.c
index 795766cb27dd..34a3e1eeb5c4 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc.c
@@ -347,10 +347,16 @@ static bool create_link_encoders(struct dc *dc)
  */
 static void destroy_link_encoders(struct dc *dc)
 {
-   unsigned int num_usb4_dpia = dc->res_pool->res_cap->num_usb4_dpia;
-   unsigned int num_dig_link_enc = dc->res_pool->res_cap->num_dig_link_enc;
+   unsigned int num_usb4_dpia;
+   unsigned int num_dig_link_enc;
int i;
 
+   if (!dc->res_pool)
+   return;
+
+   num_usb4_dpia = dc->res_pool->res_cap->num_usb4_dpia;
+   num_dig_link_enc = dc->res_pool->res_cap->num_dig_link_enc;
+
/* A platform without USB4 DPIA endpoints has a fixed mapping between 
DIG
 * link encoders and physical display endpoints and does not require
 * additional link encoder objects.
-- 
2.25.1



[PATCH 23/40] drm/amd/display: Remove incorrect ASSERT check for link_enc

2022-06-30 Thread Rodrigo Siqueira
From: Nicholas Kazlauskas 

[Why]
It's possible that we don't have a link encoder assignment if the
context is NULL but we're calling dc_add_stream_to_ctx from DM directly.

Link encoder assignment will happen later after global validation
runs with fast_validate = false.

[How]
Remove the ASSERTION. We already guard against NULL link_enc.

Reviewed-by: Michael Strauss 
Acked-by: Rodrigo Siqueira 
Signed-off-by: Nicholas Kazlauskas 
---
 drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c 
b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c
index 0b1ef76e3268..a0a94ea58a57 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c
@@ -1246,10 +1246,9 @@ static void get_pixel_clock_parameters(
pixel_clk_params->requested_pix_clk_100hz = 
stream->timing.pix_clk_100hz;
 
link_enc = link_enc_cfg_get_link_enc(link);
-   ASSERT(link_enc);
-
if (link_enc)
pixel_clk_params->encoder_object_id = link_enc->id;
+
pixel_clk_params->signal_type = pipe_ctx->stream->signal;
pixel_clk_params->controller_id = pipe_ctx->stream_res.tg->inst + 1;
/* TODO: un-hardcode*/
-- 
2.25.1



[PATCH 14/40] drm/amd/display: Add minimal pipe split transition state

2022-06-30 Thread Rodrigo Siqueira
[WHY?]
When adding/removing a plane to some configurations, unsupported pipe
programming can occur when moving to a new plane.  Such cases include pipe
split on multi-display, with MPO, and/or ODM.

[HOW?]
Add a safe transistion state that minimizes pipe usage before programming
new configuration. When adding a plane, the current state has the least
pipes required so it is applied without splitting.  This must be applied
prior to updating the plane_state for seamless transition.  When removing a
plane, the new state has the least pieps required so it is applied without
splitting.

Signed-off-by: Rodrigo Siqueira 
---
 drivers/gpu/drm/amd/display/dc/core/dc.c   | 277 +
 drivers/gpu/drm/amd/display/dc/dc_stream.h |  18 ++
 2 files changed, 295 insertions(+)

diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c 
b/drivers/gpu/drm/amd/display/dc/core/dc.c
index 05c2e178ca99..40848eda44d9 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc.c
@@ -2717,6 +2717,137 @@ static void copy_stream_update_to_stream(struct dc *dc,
}
 }
 
+void dc_reset_state(struct dc *dc, struct dc_state *context)
+{
+   dc_resource_state_destruct(context);
+
+   /* clear the structure, but don't reset the reference count */
+   memset(context, 0, offsetof(struct dc_state, refcount));
+
+   init_state(dc, context);
+}
+
+static bool update_planes_and_stream_state(struct dc *dc,
+   struct dc_surface_update *srf_updates, int surface_count,
+   struct dc_stream_state *stream,
+   struct dc_stream_update *stream_update,
+   enum surface_update_type *new_update_type,
+   struct dc_state **new_context)
+{
+   struct dc_state *context;
+   int i, j;
+   enum surface_update_type update_type;
+   const struct dc_stream_status *stream_status;
+   struct dc_context *dc_ctx = dc->ctx;
+
+   stream_status = dc_stream_get_status(stream);
+
+   if (!stream_status) {
+   if (surface_count) /* Only an error condition if surf_count 
non-zero*/
+   ASSERT(false);
+
+   return false; /* Cannot commit surface to stream that is not 
committed */
+   }
+
+   context = dc->current_state;
+
+   update_type = dc_check_update_surfaces_for_stream(
+   dc, srf_updates, surface_count, stream_update, 
stream_status);
+
+   /* update current stream with the new updates */
+   copy_stream_update_to_stream(dc, context, stream, stream_update);
+
+   /* do not perform surface update if surface has invalid dimensions
+* (all zero) and no scaling_info is provided
+*/
+   if (surface_count > 0) {
+   for (i = 0; i < surface_count; i++) {
+   if ((srf_updates[i].surface->src_rect.width == 0 ||
+srf_updates[i].surface->src_rect.height == 0 ||
+srf_updates[i].surface->dst_rect.width == 0 ||
+srf_updates[i].surface->dst_rect.height == 0) 
&&
+   (!srf_updates[i].scaling_info ||
+ srf_updates[i].scaling_info->src_rect.width 
== 0 ||
+ srf_updates[i].scaling_info->src_rect.height 
== 0 ||
+ srf_updates[i].scaling_info->dst_rect.width 
== 0 ||
+ srf_updates[i].scaling_info->dst_rect.height 
== 0)) {
+   DC_ERROR("Invalid src/dst rects in surface 
update!\n");
+   return false;
+   }
+   }
+   }
+
+   if (update_type >= update_surface_trace_level)
+   update_surface_trace(dc, srf_updates, surface_count);
+
+   if (update_type >= UPDATE_TYPE_FULL) {
+   struct dc_plane_state *new_planes[MAX_SURFACES] = {0};
+
+   for (i = 0; i < surface_count; i++)
+   new_planes[i] = srf_updates[i].surface;
+
+   /* initialize scratch memory for building context */
+   context = dc_create_state(dc);
+   if (context == NULL) {
+   DC_ERROR("Failed to allocate new validate context!\n");
+   return false;
+   }
+
+   dc_resource_state_copy_construct(
+   dc->current_state, context);
+
+   /*remove old surfaces from context */
+   if (!dc_rem_all_planes_for_stream(dc, stream, context)) {
+
+   BREAK_TO_DEBUGGER();
+   goto fail;
+   }
+
+   /* add surface to context */
+   if (!dc_add_all_planes_for_stream(dc, stream, new_planes, 
surface_count, context)) {
+
+   BREAK_TO_DEBUGGER();
+   goto fail;
+   }
+   }

[PATCH 17/40] drm/amd/display: Maintain consistent mode of operation during encoder assignment

2022-06-30 Thread Rodrigo Siqueira
From: Jimmy Kizito 

[Why]
While applying a state to hardware, there is a transition period where
the back-end is reset using the old state; then enabled using the new
state.

Generally, the link encoder configuration module queries
stream-to-encoder assignments in either the new or old state based on a
mode variable. During the transition there is a need to query both
states, however toggling this mode variable can lead to incorrect
programming of encoders.

[How]
- Add new function to explicity query stream-to-encoder assignment
in the current state rather than intermittently switch the mode
of operation of the link encoder assignment module.
- Add additional checks for encoder assignment defects.
- Explicitly reset the mode of operation if application of state
to hardware ends prematurely.

Acked-by: Rodrigo Siqueira 
Signed-off-by: Jimmy Kizito 
---
 drivers/gpu/drm/amd/display/dc/core/dc.c  |  5 ++-
 .../drm/amd/display/dc/core/dc_link_enc_cfg.c | 32 +++
 .../gpu/drm/amd/display/dc/core/dc_resource.c |  7 ++--
 .../gpu/drm/amd/display/dc/inc/link_enc_cfg.h |  5 +++
 4 files changed, 44 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c 
b/drivers/gpu/drm/amd/display/dc/core/dc.c
index 34a3e1eeb5c4..8ed208e5def2 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc.c
@@ -1750,8 +1750,11 @@ static enum dc_status dc_commit_state_no_check(struct dc 
*dc, struct dc_state *c
 
result = dc->hwss.apply_ctx_to_hw(dc, context);
 
-   if (result != DC_OK)
+   if (result != DC_OK) {
+   /* Application of dc_state to hardware stopped. */
+   dc->current_state->res_ctx.link_enc_cfg_ctx.mode = 
LINK_ENC_CFG_STEADY;
return result;
+   }
 
dc_trigger_sync(dc, context);
 
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_enc_cfg.c 
b/drivers/gpu/drm/amd/display/dc/core/dc_link_enc_cfg.c
index 639a0a276a08..614f022d1cff 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_link_enc_cfg.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_enc_cfg.c
@@ -292,6 +292,7 @@ void link_enc_cfg_link_encs_assign(
int j;
 
ASSERT(state->stream_count == stream_count);
+   ASSERT(dc->current_state->res_ctx.link_enc_cfg_ctx.mode == 
LINK_ENC_CFG_STEADY);
 
/* Release DIG link encoder resources before running assignment 
algorithm. */
for (i = 0; i < dc->current_state->stream_count; i++)
@@ -561,6 +562,31 @@ struct link_encoder *link_enc_cfg_get_link_enc(
return link_enc;
 }
 
+struct link_encoder *link_enc_cfg_get_link_enc_used_by_stream_current(
+   struct dc *dc,
+   const struct dc_stream_state *stream)
+{
+   struct link_encoder *link_enc = NULL;
+   struct display_endpoint_id ep_id;
+   int i;
+
+   ep_id = (struct display_endpoint_id) {
+   .link_id = stream->link->link_id,
+   .ep_type = stream->link->ep_type};
+
+   for (i = 0; i < MAX_PIPES; i++) {
+   struct link_enc_assignment assignment =
+   
dc->current_state->res_ctx.link_enc_cfg_ctx.link_enc_assignments[i];
+
+   if (assignment.valid == true && 
are_ep_ids_equal(_id, _id)) {
+   link_enc = 
stream->link->dc->res_pool->link_encoders[assignment.eng_id - ENGINE_ID_DIGA];
+   break;
+   }
+   }
+
+   return link_enc;
+}
+
 bool link_enc_cfg_is_link_enc_avail(struct dc *dc, enum engine_id eng_id, 
struct dc_link *link)
 {
bool is_avail = true;
@@ -595,6 +621,7 @@ bool link_enc_cfg_validate(struct dc *dc, struct dc_state 
*state)
uint8_t dig_stream_count = 0;
int matching_stream_ptrs = 0;
int eng_ids_per_ep_id[MAX_PIPES] = {0};
+   int ep_ids_per_eng_id[MAX_PIPES] = {0};
int valid_bitmap = 0;
 
/* (1) No. valid entries same as stream count. */
@@ -630,6 +657,7 @@ bool link_enc_cfg_validate(struct dc *dc, struct dc_state 
*state)
struct display_endpoint_id ep_id_i = assignment_i.ep_id;
 
eng_ids_per_ep_id[i]++;
+   ep_ids_per_eng_id[i]++;
for (j = 0; j < MAX_PIPES; j++) {
struct link_enc_assignment assignment_j =

state->res_ctx.link_enc_cfg_ctx.link_enc_assignments[j];
@@ -644,6 +672,10 @@ bool link_enc_cfg_validate(struct dc *dc, struct dc_state 
*state)
assignment_i.eng_id != 
assignment_j.eng_id) {
valid_uniqueness = false;
eng_ids_per_ep_id[i]++;
+   } else if (!are_ep_ids_equal(_id_i, 
_id_j) &&
+   assignment_i.eng_id == 

[PATCH 11/40] drm/amd/display: Add basic infrastructure for enabling FAMS

2022-06-30 Thread Rodrigo Siqueira
We want to enable Firmware Assisted Memory (FAMS) Switching, but first,
we need to add the required code infrastructure in DC before allowing it
in amdgpu_dm.

Signed-off-by: Rodrigo Siqueira 
---
 drivers/gpu/drm/amd/display/dc/core/dc.c  |   8 +
 drivers/gpu/drm/amd/display/dc/dc.h   |  10 +-
 drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c  |  87 +
 drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h  |   4 +
 drivers/gpu/drm/amd/display/dc/dc_types.h |   2 +
 .../drm/amd/display/dc/dcn30/dcn30_hwseq.c|  22 ++-
 .../drm/amd/display/dc/dcn30/dcn30_resource.c | 170 ++
 .../drm/amd/display/dc/dml/dcn20/dcn20_fpu.c  |   6 +
 .../drm/amd/display/dc/dml/dcn30/dcn30_fpu.c  |   2 +
 .../gpu/drm/amd/display/dmub/inc/dmub_cmd.h   |  25 ++-
 10 files changed, 328 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c 
b/drivers/gpu/drm/amd/display/dc/core/dc.c
index fcb503b6a1a2..db02f071c949 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc.c
@@ -3842,10 +3842,18 @@ bool dc_is_plane_eligible_for_idle_optimizations(struct 
dc *dc, struct dc_plane_
 /* cleanup on driver unload */
 void dc_hardware_release(struct dc *dc)
 {
+   dc_mclk_switch_using_fw_based_vblank_stretch_shut_down(dc);
+
if (dc->hwss.hardware_release)
dc->hwss.hardware_release(dc);
 }
 
+void dc_mclk_switch_using_fw_based_vblank_stretch_shut_down(struct dc *dc)
+{
+   if (dc->current_state)
+   
dc->current_state->bw_ctx.bw.dcn.clk.fw_based_mclk_switching_shut_down = true;
+}
+
 /*
  *
  * Function: dc_is_dmub_outbox_supported -
diff --git a/drivers/gpu/drm/amd/display/dc/dc.h 
b/drivers/gpu/drm/amd/display/dc/dc.h
index 629cd76b97c2..6674edf69b87 100644
--- a/drivers/gpu/drm/amd/display/dc/dc.h
+++ b/drivers/gpu/drm/amd/display/dc/dc.h
@@ -163,7 +163,8 @@ struct dc_color_caps {
 };
 
 struct dc_dmub_caps {
-bool psr;
+   bool psr;
+   bool mclk_sw;
 };
 
 struct dc_caps {
@@ -359,6 +360,8 @@ enum visual_confirm {
VISUAL_CONFIRM_HDR = 2,
VISUAL_CONFIRM_MPCTREE = 4,
VISUAL_CONFIRM_PSR = 5,
+   VISUAL_CONFIRM_SWAPCHAIN = 6,
+   VISUAL_CONFIRM_FAMS = 7,
VISUAL_CONFIRM_SWIZZLE = 9,
 };
 
@@ -441,6 +444,7 @@ struct dc_clocks {
bool fclk_prev_p_state_change_support;
int num_ways;
bool fw_based_mclk_switching;
+   bool fw_based_mclk_switching_shut_down;
int prev_num_ways;
enum dtm_pstate dtm_level;
int max_supported_dppclk_khz;
@@ -726,6 +730,7 @@ struct dc_debug_options {
 
/* Enable dmub aux for legacy ddc */
bool enable_dmub_aux_for_legacy_ddc;
+   bool disable_fams;
bool optimize_edp_link_rate; /* eDP ILR */
/* FEC/PSR1 sequence enable delay in 100us */
uint8_t fec_enable_delay_in100us;
@@ -1452,6 +1457,9 @@ void dc_enable_dcmode_clk_limit(struct dc *dc, bool 
enable);
 /* cleanup on driver unload */
 void dc_hardware_release(struct dc *dc);
 
+/* disables fw based mclk switch */
+void dc_mclk_switch_using_fw_based_vblank_stretch_shut_down(struct dc *dc);
+
 bool dc_set_psr_allow_active(struct dc *dc, bool enable);
 void dc_z10_restore(const struct dc *dc);
 void dc_z10_save_init(struct dc *dc);
diff --git a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c 
b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c
index ae63159e5d86..6a25d64dd15c 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c
+++ b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c
@@ -252,6 +252,93 @@ void dc_dmub_trace_event_control(struct dc *dc, bool 
enable)
dm_helpers_dmub_outbox_interrupt_control(dc->ctx, enable);
 }
 
+void dc_dmub_srv_drr_update_cmd(struct dc *dc, uint32_t tg_inst, uint32_t 
vtotal_min, uint32_t vtotal_max)
+{
+   union dmub_rb_cmd cmd = { 0 };
+
+   cmd.drr_update.header.type = DMUB_CMD__FW_ASSISTED_MCLK_SWITCH;
+   cmd.drr_update.header.sub_type = DMUB_CMD__FAMS_DRR_UPDATE;
+   cmd.drr_update.dmub_optc_state_req.v_total_max = vtotal_max;
+   cmd.drr_update.dmub_optc_state_req.v_total_min = vtotal_min;
+   cmd.drr_update.dmub_optc_state_req.tg_inst = tg_inst;
+
+   cmd.drr_update.header.payload_bytes = sizeof(cmd.drr_update) - 
sizeof(cmd.drr_update.header);
+
+   // Send the command to the DMCUB.
+   dc_dmub_srv_cmd_queue(dc->ctx->dmub_srv, );
+   dc_dmub_srv_cmd_execute(dc->ctx->dmub_srv);
+   dc_dmub_srv_wait_idle(dc->ctx->dmub_srv);
+}
+
+uint8_t dc_dmub_srv_get_pipes_for_stream(struct dc *dc, struct dc_stream_state 
*stream)
+{
+   uint8_t pipes = 0;
+   int i = 0;
+
+   for (i = 0; i < MAX_PIPES; i++) {
+   struct pipe_ctx *pipe = >current_state->res_ctx.pipe_ctx[i];
+
+   if (pipe->stream == stream && pipe->stream_res.tg)
+   pipes = i;
+   }
+   return pipes;
+}
+
+int 

[PATCH 09/40] drm/amd/display: Update gpuvm_max_page_table_levels IP param

2022-06-30 Thread Rodrigo Siqueira
From: Eric Bernstein 

After some experimental tests, we noticed that we need to set
gpuvm_max_page_table_levels to '4' to meet the hardware requirements.

Signed-off-by: Eric Bernstein 
---
 drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c   | 2 +-
 drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c 
b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c
index 63227c55a2f0..1f2af676191b 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c
@@ -121,7 +121,7 @@ static const struct IP_BASE DCN_BASE = { { { { 0x0012, 
0x00C0, 0x34C
 
 struct _vcs_dpi_ip_params_st dcn3_2_ip = {
.gpuvm_enable = 1,
-   .gpuvm_max_page_table_levels = 1,
+   .gpuvm_max_page_table_levels = 4,
.hostvm_enable = 0,
.rob_buffer_size_kbytes = 128,
.det_buffer_size_kbytes = DCN3_2_DEFAULT_DET_SIZE,
diff --git a/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c 
b/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c
index 644f00f2def0..1a9bdfc35f2c 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c
@@ -124,7 +124,7 @@ static const struct IP_BASE DCN_BASE = { { { { 0x0012, 
0x00C0, 0x34C
 
 struct _vcs_dpi_ip_params_st dcn3_21_ip = {
.gpuvm_enable = 1,
-   .gpuvm_max_page_table_levels = 1,
+   .gpuvm_max_page_table_levels = 4,
.hostvm_enable = 0,
.rob_buffer_size_kbytes = 128,
.det_buffer_size_kbytes = DCN3_2_DEFAULT_DET_SIZE,
-- 
2.25.1



[PATCH 13/40] drm/amd/display: Add SubVP control lock

2022-06-30 Thread Rodrigo Siqueira
Signed-off-by: Rodrigo Siqueira 
---
 drivers/gpu/drm/amd/display/dc/core/dc.c | 12 
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c 
b/drivers/gpu/drm/amd/display/dc/core/dc.c
index db02f071c949..05c2e178ca99 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc.c
@@ -3201,15 +3201,19 @@ static void commit_planes_for_stream(struct dc *dc,
 
}
 
-#ifdef CONFIG_DRM_AMD_DC_DCN
if (update_type != UPDATE_TYPE_FAST)
if (dc->hwss.commit_subvp_config)
dc->hwss.commit_subvp_config(dc, context);
-#endif
-   if (should_lock_all_pipes && 
dc->hwss.interdependent_update_lock)
+
+   if (should_lock_all_pipes && 
dc->hwss.interdependent_update_lock) {
dc->hwss.interdependent_update_lock(dc, context, false);
-   else
+   if (dc->hwss.subvp_pipe_control_lock)
+   dc->hwss.subvp_pipe_control_lock(dc, context, 
false, should_lock_all_pipes, NULL, subvp_prev_use);
+   } else {
dc->hwss.pipe_control_lock(dc, top_pipe_to_program, 
false);
+   if (dc->hwss.subvp_pipe_control_lock)
+   dc->hwss.subvp_pipe_control_lock(dc, context, 
false, should_lock_all_pipes, top_pipe_to_program, subvp_prev_use);
+   }
 
if ((update_type != UPDATE_TYPE_FAST) && 
stream->update_flags.bits.dsc_changed)
if 
(top_pipe_to_program->stream_res.tg->funcs->lock_doublebuffer_enable) {
-- 
2.25.1



[PATCH 08/40] drm/amd/display: Switch to correct DTO on HDMI

2022-06-30 Thread Rodrigo Siqueira
From: Chris Park 

[Why]
For Pixel Rate control, when on HDMI, HDMI DTO should be selected
instead of DP DTO.

[How]
Pass HDMI parameter for HDMI stream, and select correct DTO.

Signed-off-by: Chris Park 
---
 drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c | 2 ++
 drivers/gpu/drm/amd/display/dc/dcn32/dcn32_dccg.c| 4 
 2 files changed, 6 insertions(+)

diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c 
b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c
index 4e8059f20007..a49e84d58892 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c
@@ -318,6 +318,8 @@ static void dcn32_update_clocks_update_dtb_dto(struct 
clk_mgr_internal *clk_mgr,
if (pipe_ctx->stream_res.audio != NULL)
dto_params.req_audio_dtbclk_khz = 24000;
}
+   if (dc_is_hdmi_signal(pipe_ctx->stream->signal))
+   dto_params.is_hdmi = true;
 
dccg->funcs->set_dtbclk_dto(clk_mgr->dccg, _params);
//dccg->funcs->set_audio_dtbclk_dto(clk_mgr->dccg, 
_params);
diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_dccg.c 
b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_dccg.c
index 152a76ad7957..12fc3afd9acd 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_dccg.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_dccg.c
@@ -171,6 +171,9 @@ void dccg32_set_dtbclk_dto(
REG_UPDATE_2(OTG_PIXEL_RATE_CNTL[params->otg_inst],
DTBCLK_DTO_ENABLE[params->otg_inst], 0,
PIPE_DTO_SRC_SEL[params->otg_inst], 1);
+   if (params->is_hdmi)
+   REG_UPDATE(OTG_PIXEL_RATE_CNTL[params->otg_inst],
+   PIPE_DTO_SRC_SEL[params->otg_inst], 0);
 
REG_WRITE(DTBCLK_DTO_MODULO[params->otg_inst], 0);
REG_WRITE(DTBCLK_DTO_PHASE[params->otg_inst], 0);
@@ -188,6 +191,7 @@ static void dccg32_set_valid_pixel_rate(
dto_params.ref_dtbclk_khz = ref_dtbclk_khz;
dto_params.otg_inst = otg_inst;
dto_params.pixclk_khz = pixclk_khz;
+   dto_params.is_hdmi = true;
 
dccg32_set_dtbclk_dto(dccg, _params);
 }
-- 
2.25.1



[PATCH 10/40] drm/amd/display: Make OPTC3 function accessible to other DCN

2022-06-30 Thread Rodrigo Siqueira
From: Alvin Lee 

[Why]
Newer DCN should use optc3

[How]
Declare optc3 vmin/vmax function in header.

Signed-off-by: Alvin Lee 
---
 drivers/gpu/drm/amd/display/dc/dc.h   | 1 +
 drivers/gpu/drm/amd/display/dc/dcn30/dcn30_optc.c | 5 +
 drivers/gpu/drm/amd/display/dc/dcn32/dcn32_init.c | 2 +-
 drivers/gpu/drm/amd/display/dc/dcn32/dcn32_optc.c | 2 +-
 4 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dc.h 
b/drivers/gpu/drm/amd/display/dc/dc.h
index 7fa46e35bac6..629cd76b97c2 100644
--- a/drivers/gpu/drm/amd/display/dc/dc.h
+++ b/drivers/gpu/drm/amd/display/dc/dc.h
@@ -440,6 +440,7 @@ struct dc_clocks {
bool prev_p_state_change_support;
bool fclk_prev_p_state_change_support;
int num_ways;
+   bool fw_based_mclk_switching;
int prev_num_ways;
enum dtm_pstate dtm_level;
int max_supported_dppclk_khz;
diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_optc.c 
b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_optc.c
index 9a440ae8f865..80136b5d7e48 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_optc.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_optc.c
@@ -293,6 +293,11 @@ static void optc3_set_timing_double_buffer(struct 
timing_generator *optc, bool e
   OTG_DRR_TIMING_DBUF_UPDATE_MODE, mode);
 }
 
+void optc3_set_vtotal_min_max(struct timing_generator *optc, int vtotal_min, 
int vtotal_max)
+{
+   optc1_set_vtotal_min_max(optc, vtotal_min, vtotal_max);
+}
+
 void optc3_tg_init(struct timing_generator *optc)
 {
optc3_set_timing_double_buffer(optc, true);
diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_init.c 
b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_init.c
index 30361ebe7d22..c279a25ea293 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_init.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_init.c
@@ -58,7 +58,7 @@ static const struct hw_sequencer_funcs dcn32_funcs = {
.pipe_control_lock = dcn20_pipe_control_lock,
.interdependent_update_lock = dcn10_lock_all_pipes,
.cursor_lock = dcn10_cursor_lock,
-   .prepare_bandwidth = dcn20_prepare_bandwidth,
+   .prepare_bandwidth = dcn30_prepare_bandwidth,
.optimize_bandwidth = dcn20_optimize_bandwidth,
.update_bandwidth = dcn20_update_bandwidth,
.set_drr = dcn10_set_drr,
diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_optc.c 
b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_optc.c
index 88275ea4193c..00ff21458a53 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_optc.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_optc.c
@@ -223,7 +223,7 @@ static struct timing_generator_funcs dcn32_tg_funcs = {
.set_vrr_m_const = optc3_set_vrr_m_const,
.set_drr = optc1_set_drr,
.get_last_used_drr_vtotal = optc2_get_last_used_drr_vtotal,
-   .set_vtotal_min_max = optc1_set_vtotal_min_max,
+   .set_vtotal_min_max = optc3_set_vtotal_min_max,
.set_static_screen_control = optc1_set_static_screen_control,
.program_stereo = optc1_program_stereo,
.is_stereo_left_eye = optc1_is_stereo_left_eye,
-- 
2.25.1



[PATCH 07/40] drm/amd/display: Change DET policy for MPO cases

2022-06-30 Thread Rodrigo Siqueira
From: Alvin Lee 

For MPO we want to allocate less than maximum DET for MPO pipes because
we need enogh buffer to move DET back to toher pipes when removing an
MPO plane. Also update regular DET allocation to use DET override (DCN32
has an internal policy which driver does not want to use)

Signed-off-by: Alvin Lee 
---
 .../drm/amd/display/dc/dcn32/dcn32_resource.c | 34 ---
 .../drm/amd/display/dc/dml/display_mode_vba.c |  2 +-
 2 files changed, 22 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c 
b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c
index 468c18acfcdc..63227c55a2f0 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c
@@ -3045,31 +3045,35 @@ int dcn32_populate_dml_pipes_from_context(
pipe_cnt++;
}
 
+   /* For DET allocation, we don't want to use DML policy (not optimal for 
utilizing all
+* the DET available for each pipe). Use the DET override input to 
maintain our driver
+* policy.
+*/
switch (pipe_cnt) {
case 1:
-   context->bw_ctx.dml.ip.det_buffer_size_kbytes = 
DCN3_2_MAX_DET_SIZE;
+   pipes[0].pipe.src.det_size_override = DCN3_2_MAX_DET_SIZE;
if (pipe->plane_state && !dc->debug.disable_z9_mpc) {
if (!is_dual_plane(pipe->plane_state->format)) {
-   context->bw_ctx.dml.ip.det_buffer_size_kbytes = 
DCN3_2_DEFAULT_DET_SIZE;
+   pipes[0].pipe.src.det_size_override = 
DCN3_2_DEFAULT_DET_SIZE;
pipes[0].pipe.src.unbounded_req_mode = true;
if (pipe->plane_state->src_rect.width >= 5120 &&
pipe->plane_state->src_rect.height >= 
2880)
-   
context->bw_ctx.dml.ip.det_buffer_size_kbytes = 320; // 5K or higher
+   pipes[0].pipe.src.det_size_override = 
320; // 5K or higher
}
}
break;
case 2:
-   context->bw_ctx.dml.ip.det_buffer_size_kbytes = 
DCN3_2_MAX_DET_SIZE / 2; // 576 KB (9 segments)
-   break;
case 3:
-   context->bw_ctx.dml.ip.det_buffer_size_kbytes = 
DCN3_2_MAX_DET_SIZE / 3; // 384 KB (6 segments)
-   break;
case 4:
-   default:
-   context->bw_ctx.dml.ip.det_buffer_size_kbytes = 
DCN3_2_DEFAULT_DET_SIZE; // 256 KB (4 segments)
+   // For 2 and 3 pipes, use (MAX_DET_SIZE / pipe_cnt), for 4 
pipes use default size for each pipe
+   for (i = 0; i < pipe_cnt; i++) {
+   pipes[i].pipe.src.det_size_override = (pipe_cnt < 4) ? 
(DCN3_2_MAX_DET_SIZE / pipe_cnt) : DCN3_2_DEFAULT_DET_SIZE;
+   }
break;
}
 
+   dcn32_update_det_override_for_mpo(dc, context, pipes);
+
return pipe_cnt;
 }
 
@@ -3365,8 +3369,8 @@ void dcn32_calculate_dlg_params(struct dc *dc, struct 
dc_state *context, display
context->res_ctx.pipe_ctx[i].det_buffer_size_kb = 0;
context->res_ctx.pipe_ctx[i].unbounded_req = false;
} else {
-   context->res_ctx.pipe_ctx[i].det_buffer_size_kb =
-   
context->bw_ctx.dml.ip.det_buffer_size_kbytes;
+   context->res_ctx.pipe_ctx[i].det_buffer_size_kb = 
get_det_buffer_size_kbytes(>bw_ctx.dml, pipes, pipe_cnt,
+   pipe_idx);
context->res_ctx.pipe_ctx[i].unbounded_req = 
pipes[pipe_idx].pipe.src.unbounded_req_mode;
}
if (context->bw_ctx.bw.dcn.clk.dppclk_khz < 
pipes[pipe_idx].clks_cfg.dppclk_mhz * 1000)
@@ -3383,8 +3387,12 @@ void dcn32_calculate_dlg_params(struct dc *dc, struct 
dc_state *context, display
context->bw_ctx.bw.dcn.clk.max_supported_dispclk_khz = 
context->bw_ctx.dml.soc.clock_limits[vlevel].dispclk_mhz
* 1000;
 
-   context->bw_ctx.bw.dcn.compbuf_size_kb = 
context->bw_ctx.dml.ip.config_return_buffer_size_in_kbytes
-   - context->bw_ctx.dml.ip.det_buffer_size_kbytes * 
pipe_idx;
+   context->bw_ctx.bw.dcn.compbuf_size_kb = 
context->bw_ctx.dml.ip.config_return_buffer_size_in_kbytes;
+
+   for (i = 0; i < dc->res_pool->pipe_count; i++) {
+   if (context->res_ctx.pipe_ctx[i].stream)
+   context->bw_ctx.bw.dcn.compbuf_size_kb -= 
context->res_ctx.pipe_ctx[i].det_buffer_size_kb;
+   }
 
for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) {
 
diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c 
b/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c
index 

[PATCH 12/40] drm/amd/display: Fix stream->link_enc unassigned during stream removal

2022-06-30 Thread Rodrigo Siqueira
From: Nicholas Kazlauskas 

[Why]
Found when running igt@kms_atomic.

Userspace attempts to do a TEST_COMMIT when 0 streams which calls
dc_remove_stream_from_ctx. This in turn calls link_enc_unassign which
ends up modifying stream->link = NULL directly, causing the global
link_enc to be removed preventing further link activity and future link
validation from passing.

[How]
We take care of link_enc unassignment at the start of
link_enc_cfg_link_encs_assign so this call is no longer necessary.

Fixes global state from being modified while unlocked.

Signed-off-by: Nicholas Kazlauskas 
---
 drivers/gpu/drm/amd/display/dc/core/dc_resource.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c 
b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
index 28803ca9e3f2..332110bb1286 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
@@ -1994,9 +1994,6 @@ enum dc_status dc_remove_stream_from_ctx(
dc->res_pool,
del_pipe->stream_res.stream_enc,
false);
-   /* Release link encoder from stream in new dc_state. */
-   if (dc->res_pool->funcs->link_enc_unassign)
-   dc->res_pool->funcs->link_enc_unassign(new_ctx, 
del_pipe->stream);
 
if (is_dp_128b_132b_signal(del_pipe)) {
update_hpo_dp_stream_engine_usage(
-- 
2.25.1



[PATCH 03/40] drm/amd/display: Prepare for new interfaces

2022-06-30 Thread Rodrigo Siqueira
From: Martin Leung 

[WHY]:
Lut pipeline will be hooked up differently in some asics
need to add new interfaces and missing registers.

[HOW]:
Add missing registers and hook up programming from DPP for pre-blend
lut.

Acked-by: Rodrigo Siqueira 
Signed-off-by: Martin Leung 
---
 .../gpu/drm/amd/display/dc/dcn30/dcn30_mpc.h  | 106 +++-
 .../drm/amd/display/dc/dcn32/dcn32_hwseq.c|  91 +++
 .../drm/amd/display/dc/dcn32/dcn32_hwseq.h|   7 +
 .../gpu/drm/amd/display/dc/dcn32/dcn32_init.c |   5 +-
 .../gpu/drm/amd/display/dc/dcn32/dcn32_mpc.c  | 236 +-
 .../gpu/drm/amd/display/dc/dcn32/dcn32_mpc.h  | 117 -
 .../drm/amd/display/dc/dcn32/dcn32_resource.c |  14 +-
 .../amd/display/dc/dcn321/dcn321_resource.c   |  16 +-
 8 files changed, 564 insertions(+), 28 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_mpc.h 
b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_mpc.h
index a4d8f77d43bc..c8a3a6a96ff7 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_mpc.h
+++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_mpc.h
@@ -283,6 +283,7 @@
uint32_t MPC_OUT_CSC_COEF_FORMAT
 
 #define MPC_REG_VARIABLE_LIST_DCN32 \
+   uint32_t MPCC_MOVABLE_CM_LOCATION_CONTROL[MAX_MPCC]; \
uint32_t MPCC_MCM_SHAPER_CONTROL[MAX_MPCC]; \
uint32_t MPCC_MCM_SHAPER_OFFSET_R[MAX_MPCC]; \
uint32_t MPCC_MCM_SHAPER_OFFSET_G[MAX_MPCC]; \
@@ -347,6 +348,80 @@
uint32_t MPCC_MCM_3DLUT_OUT_OFFSET_R[MAX_MPCC]; \
uint32_t MPCC_MCM_3DLUT_OUT_OFFSET_G[MAX_MPCC]; \
uint32_t MPCC_MCM_3DLUT_OUT_OFFSET_B[MAX_MPCC]; \
+   uint32_t MPCC_MCM_1DLUT_CONTROL[MAX_MPCC]; \
+   uint32_t MPCC_MCM_1DLUT_LUT_INDEX[MAX_MPCC]; \
+   uint32_t MPCC_MCM_1DLUT_LUT_DATA[MAX_MPCC]; \
+   uint32_t MPCC_MCM_1DLUT_LUT_CONTROL[MAX_MPCC]; \
+   uint32_t MPCC_MCM_1DLUT_RAMA_START_CNTL_B[MAX_MPCC]; \
+   uint32_t MPCC_MCM_1DLUT_RAMA_START_CNTL_G[MAX_MPCC]; \
+   uint32_t MPCC_MCM_1DLUT_RAMA_START_CNTL_R[MAX_MPCC]; \
+   uint32_t MPCC_MCM_1DLUT_RAMA_START_SLOPE_CNTL_B[MAX_MPCC]; \
+   uint32_t MPCC_MCM_1DLUT_RAMA_START_SLOPE_CNTL_G[MAX_MPCC]; \
+   uint32_t MPCC_MCM_1DLUT_RAMA_START_SLOPE_CNTL_R[MAX_MPCC]; \
+   uint32_t MPCC_MCM_1DLUT_RAMA_START_BASE_CNTL_B[MAX_MPCC]; \
+   uint32_t MPCC_MCM_1DLUT_RAMA_START_BASE_CNTL_G[MAX_MPCC]; \
+   uint32_t MPCC_MCM_1DLUT_RAMA_START_BASE_CNTL_R[MAX_MPCC]; \
+   uint32_t MPCC_MCM_1DLUT_RAMA_END_CNTL1_B[MAX_MPCC]; \
+   uint32_t MPCC_MCM_1DLUT_RAMA_END_CNTL2_B[MAX_MPCC]; \
+   uint32_t MPCC_MCM_1DLUT_RAMA_END_CNTL1_G[MAX_MPCC]; \
+   uint32_t MPCC_MCM_1DLUT_RAMA_END_CNTL2_G[MAX_MPCC]; \
+   uint32_t MPCC_MCM_1DLUT_RAMA_END_CNTL1_R[MAX_MPCC]; \
+   uint32_t MPCC_MCM_1DLUT_RAMA_END_CNTL2_R[MAX_MPCC]; \
+   uint32_t MPCC_MCM_1DLUT_RAMA_OFFSET_B[MAX_MPCC]; \
+   uint32_t MPCC_MCM_1DLUT_RAMA_OFFSET_G[MAX_MPCC]; \
+   uint32_t MPCC_MCM_1DLUT_RAMA_OFFSET_R[MAX_MPCC]; \
+   uint32_t MPCC_MCM_1DLUT_RAMA_REGION_0_1[MAX_MPCC]; \
+   uint32_t MPCC_MCM_1DLUT_RAMA_REGION_2_3[MAX_MPCC]; \
+   uint32_t MPCC_MCM_1DLUT_RAMA_REGION_4_5[MAX_MPCC]; \
+   uint32_t MPCC_MCM_1DLUT_RAMA_REGION_6_7[MAX_MPCC]; \
+   uint32_t MPCC_MCM_1DLUT_RAMA_REGION_8_9[MAX_MPCC]; \
+   uint32_t MPCC_MCM_1DLUT_RAMA_REGION_10_11[MAX_MPCC]; \
+   uint32_t MPCC_MCM_1DLUT_RAMA_REGION_12_13[MAX_MPCC]; \
+   uint32_t MPCC_MCM_1DLUT_RAMA_REGION_14_15[MAX_MPCC]; \
+   uint32_t MPCC_MCM_1DLUT_RAMA_REGION_16_17[MAX_MPCC]; \
+   uint32_t MPCC_MCM_1DLUT_RAMA_REGION_18_19[MAX_MPCC]; \
+   uint32_t MPCC_MCM_1DLUT_RAMA_REGION_20_21[MAX_MPCC]; \
+   uint32_t MPCC_MCM_1DLUT_RAMA_REGION_22_23[MAX_MPCC]; \
+   uint32_t MPCC_MCM_1DLUT_RAMA_REGION_24_25[MAX_MPCC]; \
+   uint32_t MPCC_MCM_1DLUT_RAMA_REGION_26_27[MAX_MPCC]; \
+   uint32_t MPCC_MCM_1DLUT_RAMA_REGION_28_29[MAX_MPCC]; \
+   uint32_t MPCC_MCM_1DLUT_RAMA_REGION_30_31[MAX_MPCC]; \
+   uint32_t MPCC_MCM_1DLUT_RAMA_REGION_32_33[MAX_MPCC]; \
+   uint32_t MPCC_MCM_1DLUT_RAMB_START_CNTL_B[MAX_MPCC]; \
+   uint32_t MPCC_MCM_1DLUT_RAMB_START_CNTL_G[MAX_MPCC]; \
+   uint32_t MPCC_MCM_1DLUT_RAMB_START_CNTL_R[MAX_MPCC]; \
+   uint32_t MPCC_MCM_1DLUT_RAMB_START_SLOPE_CNTL_B[MAX_MPCC]; \
+   uint32_t MPCC_MCM_1DLUT_RAMB_START_SLOPE_CNTL_G[MAX_MPCC]; \
+   uint32_t MPCC_MCM_1DLUT_RAMB_START_SLOPE_CNTL_R[MAX_MPCC]; \
+   uint32_t MPCC_MCM_1DLUT_RAMB_START_BASE_CNTL_B[MAX_MPCC]; \
+   uint32_t MPCC_MCM_1DLUT_RAMB_START_BASE_CNTL_G[MAX_MPCC]; \
+   uint32_t MPCC_MCM_1DLUT_RAMB_START_BASE_CNTL_R[MAX_MPCC]; \
+   uint32_t MPCC_MCM_1DLUT_RAMB_END_CNTL1_B[MAX_MPCC]; \
+   uint32_t MPCC_MCM_1DLUT_RAMB_END_CNTL2_B[MAX_MPCC]; \
+   uint32_t MPCC_MCM_1DLUT_RAMB_END_CNTL1_G[MAX_MPCC]; \
+   uint32_t MPCC_MCM_1DLUT_RAMB_END_CNTL2_G[MAX_MPCC]; \
+   uint32_t MPCC_MCM_1DLUT_RAMB_END_CNTL1_R[MAX_MPCC]; \
+   uint32_t 

[PATCH 04/40] drm/amd/display: Add function to set pixels per cycle

2022-06-30 Thread Rodrigo Siqueira
From: Eric Bernstein 

Add function to set pixels per cycle in DIG stream encoder

Acked-by: Rodrigo Siqueira 
Signed-off-by: Eric Bernstein 
---
 .../amd/display/dc/dcn10/dcn10_stream_encoder.h |  1 +
 .../gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c  |  5 ++---
 .../display/dc/dcn32/dcn32_dio_stream_encoder.c |  4 ++--
 .../display/dc/dcn32/dcn32_dio_stream_encoder.h |  1 +
 .../gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c  | 17 +
 .../gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.h  |  2 ++
 .../gpu/drm/amd/display/dc/dcn32/dcn32_init.c   |  1 +
 .../amd/display/dc/inc/hw_sequencer_private.h   |  1 +
 8 files changed, 27 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.h 
b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.h
index f8d22ba6a6e4..aa4f41745be4 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.h
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.h
@@ -577,6 +577,7 @@ struct dcn10_stream_enc_registers {
 
 #define SE_REG_FIELD_LIST_DCN3_2(type) \
type DIG_FIFO_OUTPUT_PIXEL_MODE;\
+   type DP_PIXEL_PER_CYCLE_PROCESSING_MODE;\
type DIG_SYMCLK_FE_ON;\
type DIG_FIFO_READ_START_LEVEL;\
type DIG_FIFO_ENABLE;\
diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c 
b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c
index 9f07c1b170c9..2286cc34e9cc 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c
@@ -2535,9 +2535,8 @@ void dcn20_enable_stream(struct pipe_ctx *pipe_ctx)
 
tg->funcs->set_early_control(tg, early_control);
 
-   if (pipe_ctx->stream_res.stream_enc->funcs->set_input_mode)
-   
pipe_ctx->stream_res.stream_enc->funcs->set_input_mode(pipe_ctx->stream_res.stream_enc,
-   timing->pixel_encoding == PIXEL_ENCODING_YCBCR420 ? 2 : 
1);
+   if (dc->hwseq->funcs.set_pixels_per_cycle)
+   dc->hwseq->funcs.set_pixels_per_cycle(pipe_ctx);
 
/* enable audio only within mode set */
if (pipe_ctx->stream_res.audio != NULL) {
diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_dio_stream_encoder.c 
b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_dio_stream_encoder.c
index 4d7588f2ee79..f16c4fcdf9e9 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_dio_stream_encoder.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_dio_stream_encoder.c
@@ -54,9 +54,9 @@ static void enc32_dp_set_odm_combine(
struct stream_encoder *enc,
bool odm_combine)
 {
-   //struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc);
+   struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc);
 
-   //TODO: REG_UPDATE(DP_PIXEL_FORMAT, DP_PIXEL_COMBINE, odm_combine);
+   REG_UPDATE(DP_PIXEL_FORMAT, DP_PIXEL_PER_CYCLE_PROCESSING_MODE, 
odm_combine ? 1 : 0);
 }
 
 /* setup stream encoder in dvi mode */
diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_dio_stream_encoder.h 
b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_dio_stream_encoder.h
index 042bc9aca944..250d9a341cf6 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_dio_stream_encoder.h
+++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_dio_stream_encoder.h
@@ -96,6 +96,7 @@
 #define SE_COMMON_MASK_SH_LIST_DCN32_BASE(mask_sh)\
SE_SF(DP0_DP_PIXEL_FORMAT, DP_PIXEL_ENCODING, mask_sh),\
SE_SF(DP0_DP_PIXEL_FORMAT, DP_COMPONENT_DEPTH, mask_sh),\
+   SE_SF(DP0_DP_PIXEL_FORMAT, DP_PIXEL_PER_CYCLE_PROCESSING_MODE, 
mask_sh),\
SE_SF(DIG0_HDMI_CONTROL, HDMI_PACKET_GEN_VERSION, mask_sh),\
SE_SF(DIG0_HDMI_CONTROL, HDMI_KEEPOUT_MODE, mask_sh),\
SE_SF(DIG0_HDMI_CONTROL, HDMI_DEEP_COLOR_ENABLE, mask_sh),\
diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c 
b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c
index 49dd9e0edcb1..01a95de80138 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c
@@ -1112,3 +1112,20 @@ unsigned int dcn32_calculate_dccg_k1_k2_values(struct 
pipe_ctx *pipe_ctx, unsign
 
return odm_combine_factor;
 }
+
+void dcn32_set_pixels_per_cycle(struct pipe_ctx *pipe_ctx)
+{
+   uint32_t pix_per_cycle = 1;
+   uint32_t odm_combine_factor = 1;
+
+   if (!pipe_ctx || !pipe_ctx->stream || !pipe_ctx->stream_res.stream_enc)
+   return;
+
+   odm_combine_factor = get_odm_config(pipe_ctx, NULL);
+   if (optc2_is_two_pixels_per_containter(_ctx->stream->timing) || 
odm_combine_factor > 1)
+   pix_per_cycle = 2;
+
+   if (pipe_ctx->stream_res.stream_enc->funcs->set_input_mode)
+   
pipe_ctx->stream_res.stream_enc->funcs->set_input_mode(pipe_ctx->stream_res.stream_enc,
+   pix_per_cycle);
+}
diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.h 
b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.h
index 

[PATCH 06/40] drm/amd/display: Use two pixel per container for k1/k2 div

2022-06-30 Thread Rodrigo Siqueira
Currently, we check if pixel_encoding is equal to
PIXEL_ENCODING_YCBCR422 to get the k1/k2 div parameters. This commit
changes this logic slightly by checking if two pixels per container are
used.

Signed-off-by: Rodrigo Siqueira 
---
 drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c 
b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c
index 7da994d8cde3..51958573a2f9 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c
@@ -1082,6 +1082,7 @@ unsigned int dcn32_calculate_dccg_k1_k2_values(struct 
pipe_ctx *pipe_ctx, unsign
struct dc_stream_state *stream = pipe_ctx->stream;
unsigned int odm_combine_factor = 0;
struct dc *dc = pipe_ctx->stream->ctx->dc;
+   bool two_pix_per_container = 
optc2_is_two_pixels_per_containter(>timing);
 
odm_combine_factor = get_odm_config(pipe_ctx, NULL);
 
@@ -1094,13 +1095,12 @@ unsigned int dcn32_calculate_dccg_k1_k2_values(struct 
pipe_ctx *pipe_ctx, unsign
else
*k2_div = PIXEL_RATE_DIV_BY_4;
} else if (dc_is_dp_signal(pipe_ctx->stream->signal)) {
-   if (stream->timing.pixel_encoding == PIXEL_ENCODING_YCBCR420) {
+   if (two_pix_per_container) {
*k1_div = PIXEL_RATE_DIV_BY_1;
*k2_div = PIXEL_RATE_DIV_BY_2;
-   } else if (stream->timing.pixel_encoding == 
PIXEL_ENCODING_YCBCR422) {
-   *k1_div = PIXEL_RATE_DIV_BY_2;
-   *k2_div = PIXEL_RATE_DIV_BY_2;
} else {
+   *k1_div = PIXEL_RATE_DIV_BY_1;
+   *k2_div = PIXEL_RATE_DIV_BY_4;
if ((odm_combine_factor == 2) || 
dc->debug.enable_dp_dig_pixel_rate_div_policy)
*k2_div = PIXEL_RATE_DIV_BY_2;
}
-- 
2.25.1



[PATCH 05/40] drm/amd/display: Apply ODM 2:1 policy for single display configuration

2022-06-30 Thread Rodrigo Siqueira
From: Samson Tam 

[Why]
Most of the time, a single display uses the ODM combine. When using
multi-display, we use ODM combine only if it is necessary. These cases
are not flexible enough for us, and we can improve them to take
advantage of our hardware. We want to have more control over the ODM
policy.

[How]
This commit add a new debug flag named
enable_single_display_2to1_odm_policy to control the ODM policy and
another flag named enable_dp_dig_pixel_rate_div_policy to fine control
the ODM combine. This is possible by adding a new "pipe.dest" parameter
that can be set to ODM 2:1 combined if we use a single display. For
dynamic ODM combine, when using DP-DIG, DCN applies K2=2 settings for
ODM combine. Note that this feature affects the following registers:

- timing.pix_clk_100khz -> DP_VID_M, DP_VID_N
- requested_pix_clk_100hz -> DP_DTOn_PHASE
- OTGn_PIXEL_RATE_DIVK2
- DP_PIXEL_PER_CYCLE_PROCESSING_MODE
- DIG_FIFO_OUTPUT_PIXEL_MODE
- DP_VID_N_MUL

Acked-by: Rodrigo Siqueira 
Signed-off-by: Samson Tam 
---
 drivers/gpu/drm/amd/display/dc/dc.h   |  2 +
 .../drm/amd/display/dc/dcn20/dcn20_resource.c |  7 +++
 .../dc/dcn32/dcn32_dio_stream_encoder.c   |  4 +-
 .../drm/amd/display/dc/dcn32/dcn32_hwseq.c| 58 +--
 .../drm/amd/display/dc/dcn32/dcn32_hwseq.h|  5 ++
 .../gpu/drm/amd/display/dc/dcn32/dcn32_init.c |  3 +-
 .../drm/amd/display/dc/dcn32/dcn32_resource.c |  6 ++
 .../amd/display/dc/dml/display_mode_structs.h |  1 +
 .../drm/amd/display/dc/dml/display_mode_vba.c |  3 +-
 .../amd/display/dc/inc/hw_sequencer_private.h |  1 +
 10 files changed, 83 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dc.h 
b/drivers/gpu/drm/amd/display/dc/dc.h
index 447647ac3d80..7fa46e35bac6 100644
--- a/drivers/gpu/drm/amd/display/dc/dc.h
+++ b/drivers/gpu/drm/amd/display/dc/dc.h
@@ -748,6 +748,8 @@ struct dc_debug_options {
enum dml_hostvm_override_opts dml_hostvm_override;
bool use_legacy_soc_bb_mechanism;
bool exit_idle_opt_for_cursor_updates;
+   bool enable_single_display_2to1_odm_policy;
+   bool enable_dp_dig_pixel_rate_div_policy;
 };
 
 struct gpu_info_soc_bounding_box_v1_0;
diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c 
b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c
index 7802d603f796..0b1ef76e3268 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c
@@ -1237,6 +1237,8 @@ static void get_pixel_clock_parameters(
int opp_cnt = 1;
struct dc_link *link = stream->link;
struct link_encoder *link_enc = NULL;
+   struct dc *dc = pipe_ctx->stream->ctx->dc;
+   struct dce_hwseq *hws = dc->hwseq;
 
for (odm_pipe = pipe_ctx->next_odm_pipe; odm_pipe; odm_pipe = 
odm_pipe->next_odm_pipe)
opp_cnt++;
@@ -1268,6 +1270,11 @@ static void get_pixel_clock_parameters(
else if (optc2_is_two_pixels_per_containter(>timing) || opp_cnt 
== 2)
pixel_clk_params->requested_pix_clk_100hz /= 2;
 
+   else if (hws->funcs.is_dp_dig_pixel_rate_div_policy) {
+   if (hws->funcs.is_dp_dig_pixel_rate_div_policy(pipe_ctx))
+   pixel_clk_params->requested_pix_clk_100hz /= 2;
+   }
+
if (stream->timing.timing_3d_format == 
TIMING_3D_FORMAT_HW_FRAME_PACKING)
pixel_clk_params->requested_pix_clk_100hz *= 2;
 
diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_dio_stream_encoder.c 
b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_dio_stream_encoder.c
index f16c4fcdf9e9..da7d2243664f 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_dio_stream_encoder.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_dio_stream_encoder.c
@@ -249,6 +249,7 @@ static void enc32_stream_encoder_dp_unblank(
const struct encoder_unblank_param *param)
 {
struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc);
+   struct dc *dc = enc->ctx->dc;
 
if (param->link_settings.link_rate != LINK_RATE_UNKNOWN) {
uint32_t n_vid = 0x8000;
@@ -257,7 +258,8 @@ static void enc32_stream_encoder_dp_unblank(
uint64_t m_vid_l = n_vid;
 
/* YCbCr 4:2:0 : Computed VID_M will be 2X the input rate */
-   if (is_two_pixels_per_containter(>timing) || 
param->opp_cnt > 1) {
+   if (is_two_pixels_per_containter(>timing) || 
param->opp_cnt > 1
+   || dc->debug.enable_dp_dig_pixel_rate_div_policy) {
/*this logic should be the same in 
get_pixel_clock_parameters() */
n_multiply = 1;
}
diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c 
b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c
index 01a95de80138..7da994d8cde3 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c
@@ -1081,6 +1081,7 @@ 

[PATCH 01/40] drm/amd/display: Add missing registers for ACP

2022-06-30 Thread Rodrigo Siqueira
We are missing some ACP registers/mask value for some specific ASICs.
This commit includes it to those ASICs that support it.

Signed-off-by: Rodrigo Siqueira 
---
 drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_3_0_0_sh_mask.h | 2 ++
 drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_3_0_1_sh_mask.h | 2 ++
 drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_3_0_2_sh_mask.h | 2 ++
 drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_3_0_3_sh_mask.h | 2 ++
 drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_3_1_2_sh_mask.h | 2 ++
 5 files changed, 10 insertions(+)

diff --git a/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_3_0_0_sh_mask.h 
b/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_3_0_0_sh_mask.h
index ea683f452bb3..b79be3a25a80 100644
--- a/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_3_0_0_sh_mask.h
+++ b/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_3_0_0_sh_mask.h
@@ -38746,12 +38746,14 @@
 #define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_GC_CONT__SHIFT  
   0x5
 #define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_SEND__SHIFT
   0x8
 #define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_CONT__SHIFT
   0x9
+#define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ACP_SEND__SHIFT 
   0xc
 #define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_LINE__SHIFT
   0x10
 #define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_NULL_SEND_MASK  
   0x0001L
 #define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_GC_SEND_MASK
   0x0010L
 #define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_GC_CONT_MASK
   0x0020L
 #define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_SEND_MASK  
   0x0100L
 #define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_CONT_MASK  
   0x0200L
+#define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ACP_SEND_MASK   
   0x1000L
 #define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_LINE_MASK  
   0x003FL
 //DIG0_HDMI_INFOFRAME_CONTROL0
 #define DIG0_HDMI_INFOFRAME_CONTROL0__HDMI_AUDIO_INFO_SEND__SHIFT  
   0x4
diff --git a/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_3_0_1_sh_mask.h 
b/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_3_0_1_sh_mask.h
index 59155007c186..e454d4469f17 100644
--- a/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_3_0_1_sh_mask.h
+++ b/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_3_0_1_sh_mask.h
@@ -29361,12 +29361,14 @@
 #define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_GC_CONT__SHIFT  
   0x5
 #define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_SEND__SHIFT
   0x8
 #define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_CONT__SHIFT
   0x9
+#define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ACP_SEND__SHIFT 
   0xc
 #define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_LINE__SHIFT
   0x10
 #define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_NULL_SEND_MASK  
   0x0001L
 #define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_GC_SEND_MASK
   0x0010L
 #define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_GC_CONT_MASK
   0x0020L
 #define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_SEND_MASK  
   0x0100L
 #define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_CONT_MASK  
   0x0200L
+#define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ACP_SEND_MASK   
   0x1000L
 #define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_LINE_MASK  
   0x003FL
 //DIG0_HDMI_INFOFRAME_CONTROL0
 #define DIG0_HDMI_INFOFRAME_CONTROL0__HDMI_AUDIO_INFO_SEND__SHIFT  
   0x4
diff --git a/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_3_0_2_sh_mask.h 
b/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_3_0_2_sh_mask.h
index 396c33fafc91..b9de0ebc8b03 100644
--- a/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_3_0_2_sh_mask.h
+++ b/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_3_0_2_sh_mask.h
@@ -33955,12 +33955,14 @@
 #define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_GC_CONT__SHIFT  
   0x5
 #define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_SEND__SHIFT
   0x8
 #define 

[PATCH 00/40] DC Patches Jun 30, 2022

2022-06-30 Thread Rodrigo Siqueira
This DC patchset is a large one that brings improvements in multiple
areas. In summary, we highlight:

- Program ACP-related registers
- Fixes for DMUB, DPIA, PSR, and others
- Improvements in the pipe split
- Add SubVP code
- Add basic setup for FAMS support
- Improve BB capabilities

Cc: Daniel Wheeler 

Thanks
Siqueira

Alan Liu (1):
  drm/amd/display: Program ACP related register

Alvin Lee (5):
  drm/amd/display: Add SubVP required code
  drm/amd/display: Change DET policy for MPO cases
  drm/amd/display: Make OPTC3 function accessible to other DCN
  drm/amd/display: Don't set dram clock change requirement for SubVP
  drm/amd/display: Maintain old audio programming sequence

Aric Cyr (1):
  drm/amd/display: 3.2.192

Chris Park (3):
  drm/amd/display: Switch to correct DTO on HDMI
  drm/amd/display: Indicate stream change on ODM change
  drm/amd/display: OVT Update on InfoFrame and Mode Management

Dmytro Laktyushkin (2):
  drm/amd/display: disable timing sync b/w odm halves
  drm/amd/display: disable otg toggle w/a on boot

Duncan Ma (1):
  drm/amd/display: Add flag to modify MST delay

Eric Bernstein (3):
  drm/amd/display: Add function to set pixels per cycle
  drm/amd/display: Update gpuvm_max_page_table_levels IP param
  drm/amd/display: Fix null timing generator resource

Evgenii Krasnikov (1):
  drm/amd/display: add an option to skip wait for HPD when powering on
eDP panel

Fangzhi Zuo (1):
  drm/amd/display: Fix dmub soft hang for PSR 1

Hamza Mahfooz (2):
  drm/amd/display: enable PCON SST support for newer ASICs
  drm/amd/display: rename hdmi_frl_pcon_support

Harry Wentland (1):
  drm/amd/display: Move all linux includes into OS types

Jimmy Kizito (3):
  drm/amd/display: Maintain consistent mode of operation during encoder
assignment
  drm/amd/display: Disable TBT3 DSC work around by default.
  drm/amd/display: Fix uninitialized variable.

Jun Lei (1):
  drm/amd/display: Extend soc BB capabilitiy

Martin Leung (2):
  drm/amd/display: Prepare for new interfaces
  drm/amd/display: guard for virtual calling destroy_link_encoders

Meenakshikumar Somasundaram (1):
  drm/amd/display: Remove configuration option for dpia hpd delay

Michael Strauss (1):
  drm/amd/display: Initialize lt_settings on instantiation

Nicholas Kazlauskas (4):
  drm/amd/display: Fix stream->link_enc unassigned during stream removal
  drm/amd/display: Guard against ddc_pin being NULL for AUX
  drm/amd/display: Remove incorrect ASSERT check for link_enc
  drm/amd/display: Guard against NULL link encoder in log hw state

Rodrigo Siqueira (6):
  drm/amd/display: Add missing registers for ACP
  drm/amd/display: Use two pixel per container for k1/k2 div
  drm/amd/display: Add basic infrastructure for enabling FAMS
  drm/amd/display: Add SubVP control lock
  drm/amd/display: Add minimal pipe split transition state
  drm/amd/display: Fix refresh rate issue on Club 3D

Samson Tam (1):
  drm/amd/display: Apply ODM 2:1 policy for single display configuration

 .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c |  10 +-
 drivers/gpu/drm/amd/display/dc/Makefile   |  27 +-
 .../gpu/drm/amd/display/dc/basics/vector.c|   2 -
 .../drm/amd/display/dc/bios/bios_parser2.c|   2 -
 .../gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c  |   6 +
 .../dc/clk_mgr/dcn315/dcn315_clk_mgr.c|   7 +-
 .../display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c  |   2 +
 .../dc/clk_mgr/dcn32/dcn32_clk_mgr_smu_msg.c  |   5 +-
 drivers/gpu/drm/amd/display/dc/core/dc.c  | 384 +-
 .../drm/amd/display/dc/core/dc_hw_sequencer.c |   2 -
 .../gpu/drm/amd/display/dc/core/dc_link_ddc.c |   2 -
 .../gpu/drm/amd/display/dc/core/dc_link_dp.c  |   9 +-
 .../drm/amd/display/dc/core/dc_link_dpia.c|  58 +-
 .../drm/amd/display/dc/core/dc_link_enc_cfg.c |  32 +
 .../gpu/drm/amd/display/dc/core/dc_resource.c |  32 +-
 drivers/gpu/drm/amd/display/dc/core/dc_sink.c |   2 -
 .../gpu/drm/amd/display/dc/core/dc_stream.c   |   3 -
 .../gpu/drm/amd/display/dc/core/dc_surface.c  |   2 -
 drivers/gpu/drm/amd/display/dc/dc.h   |  27 +-
 drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c  | 416 +++
 drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h  |   4 +
 drivers/gpu/drm/amd/display/dc/dc_hw_types.h  |   2 +
 drivers/gpu/drm/amd/display/dc/dc_stream.h|  24 +-
 drivers/gpu/drm/amd/display/dc/dc_types.h |   5 +
 .../gpu/drm/amd/display/dc/dce/dce_audio.c|  13 +-
 drivers/gpu/drm/amd/display/dc/dce/dce_aux.c  |   8 +-
 .../drm/amd/display/dc/dce/dce_clock_source.c |   2 -
 drivers/gpu/drm/amd/display/dc/dce/dce_dmcu.c |   3 -
 .../gpu/drm/amd/display/dc/dce/dce_i2c_sw.c   |   2 -
 drivers/gpu/drm/amd/display/dc/dce/dce_ipp.c  |   2 -
 .../drm/amd/display/dc/dce/dce_link_encoder.c |   3 -
 drivers/gpu/drm/amd/display/dc/dce/dce_opp.c  |   2 -
 .../amd/display/dc/dce/dce_stream_encoder.c   |   5 +-
 .../amd/display/dc/dce/dce_stream_encoder.h   |  14 +-
 .../amd/display/dc/dce100/dce100_resource.c   |   2 -
 .../amd/display/dc/dce110/dce110_compressor.c 

[PATCH 5/5] libhsakmt: allocate unified memory for ctx save restore area

2022-06-30 Thread Eric Huang
To improve performance on queue preemption, allocate ctx s/r
 area in VRAM instead of system memory, and migrate it back
 to system memory when VRAM is full.

Signed-off-by: Eric Huang 
Change-Id: If775782027188dbe84b6868260e429373675434c
---
 include/hsakmttypes.h |   1 +
 src/queues.c  | 103 --
 2 files changed, 90 insertions(+), 14 deletions(-)

diff --git a/include/hsakmttypes.h b/include/hsakmttypes.h
index 9063f85..2c1c7cc 100644
--- a/include/hsakmttypes.h
+++ b/include/hsakmttypes.h
@@ -1329,6 +1329,7 @@ typedef enum _HSA_SVM_FLAGS {
HSA_SVM_FLAG_GPU_RO  = 0x0008, // GPUs only read, allows 
replication
HSA_SVM_FLAG_GPU_EXEC= 0x0010, // Allow execution on GPU
HSA_SVM_FLAG_GPU_READ_MOSTLY = 0x0020, // GPUs mostly read, may 
allow similar optimizations as RO, but writes fault
+   HSA_SVM_FLAG_GPU_ALWAYS_MAPPED = 0x0040, // Keep GPU memory mapping 
always valid as if XNACK is disable
 } HSA_SVM_FLAGS;
 
 typedef enum _HSA_SVM_ATTR_TYPE {
diff --git a/src/queues.c b/src/queues.c
index c83dd93..d5109f9 100644
--- a/src/queues.c
+++ b/src/queues.c
@@ -68,6 +68,7 @@ struct queue {
uint32_t eop_buffer_size;
uint32_t gfxv;
bool use_ats;
+   bool unified_ctx_save_restore;
/* This queue structure is allocated from GPU with page aligned size
 * but only small bytes are used. We use the extra space in the end for
 * cu_mask bits array.
@@ -383,13 +384,47 @@ static void free_exec_aligned_memory(void *addr, uint32_t 
size, uint32_t align,
munmap(addr, size);
 }
 
+static HSAKMT_STATUS register_svm_range(void *mem, uint32_t size,
+   uint32_t gpuNode, uint32_t prefetchNode,
+   uint32_t preferredNode, bool alwaysMapped)
+{
+   HSA_SVM_ATTRIBUTE *attrs;
+   HSAuint64 s_attr;
+   HSAuint32 nattr;
+   HSAuint32 flags;
+
+   flags = HSA_SVM_FLAG_HOST_ACCESS;
+
+   if (alwaysMapped)
+   flags |= HSA_SVM_FLAG_GPU_ALWAYS_MAPPED;
+
+   nattr = 5;
+   s_attr = sizeof(*attrs) * nattr;
+   attrs = (HSA_SVM_ATTRIBUTE *)alloca(s_attr);
+
+   attrs[0].type = HSA_SVM_ATTR_PREFETCH_LOC;
+   attrs[0].value = prefetchNode;
+   attrs[1].type = HSA_SVM_ATTR_PREFERRED_LOC;
+   attrs[1].value = preferredNode;
+   attrs[2].type = HSA_SVM_ATTR_CLR_FLAGS;
+   attrs[2].value = ~flags;
+   attrs[3].type = HSA_SVM_ATTR_SET_FLAGS;
+   attrs[3].value = flags;
+   attrs[4].type = HSA_SVM_ATTR_ACCESS;
+   attrs[4].value = gpuNode;
+
+   return hsaKmtSVMSetAttr(mem, size, nattr, attrs);
+}
+
 static void free_queue(struct queue *q)
 {
if (q->eop_buffer)
free_exec_aligned_memory(q->eop_buffer,
 q->eop_buffer_size,
 PAGE_SIZE, q->use_ats);
-   if (q->ctx_save_restore)
+   if (q->unified_ctx_save_restore)
+   free(q->ctx_save_restore);
+   else if (q->ctx_save_restore)
free_exec_aligned_memory(q->ctx_save_restore,
 q->ctx_save_restore_size,
 PAGE_SIZE, q->use_ats);
@@ -425,6 +460,8 @@ static int handle_concrete_asic(struct queue *q,
if (ret) {
uint32_t total_mem_alloc_size = 0;
HsaUserContextSaveAreaHeader *header;
+   HsaNodeProperties node;
+   bool svm_api;
 
args->ctx_save_restore_size = q->ctx_save_restore_size;
args->ctl_stack_size = q->ctl_stack_size;
@@ -434,22 +471,60 @@ static int handle_concrete_asic(struct queue *q,
 */
total_mem_alloc_size = q->ctx_save_restore_size +
   q->debug_memory_size;
-   q->ctx_save_restore =
-   allocate_exec_aligned_memory(total_mem_alloc_size,
-q->use_ats, NodeId, false, false);
 
-   if (!q->ctx_save_restore)
-   return HSAKMT_STATUS_NO_MEMORY;
+   if (hsaKmtGetNodeProperties(NodeId, ))
+   svm_api = false;
+   else
+   svm_api = node.Capability.ui32.SVMAPISupported;
 
-   args->ctx_save_restore_address = (uintptr_t)q->ctx_save_restore;
+   /* Allocate unified memory for context save restore
+* area on dGPU.
+*/
+   if (!q->use_ats && svm_api) {
+   uint32_t size = PAGE_ALIGN_UP(total_mem_alloc_size);
+   void *addr;
+   HSAKMT_STATUS r = HSAKMT_STATUS_ERROR;
+
+   if (posix_memalign(, GPU_HUGE_PAGE_SIZE, size))
+   pr_err("[%s] posix_memalign failed:\n", 
__func__);
+

[PATCH 4/5] libhsakmt: add new flags for svm

2022-06-30 Thread Eric Huang
It is to add new option for always keeping gpu mapping.

Signed-off-by: Eric Huang 
Change-Id: Iebee35e6de4d52fa29f82dd19f6bbf5640249492
---
 include/linux/kfd_ioctl.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/include/linux/kfd_ioctl.h b/include/linux/kfd_ioctl.h
index 8a0ed49..5c45f58 100644
--- a/include/linux/kfd_ioctl.h
+++ b/include/linux/kfd_ioctl.h
@@ -1069,6 +1069,8 @@ struct kfd_ioctl_cross_memory_copy_args {
 #define KFD_IOCTL_SVM_FLAG_GPU_EXEC0x0010
 /* GPUs mostly read, may allow similar optimizations as RO, but writes fault */
 #define KFD_IOCTL_SVM_FLAG_GPU_READ_MOSTLY 0x0020
+/* Keep GPU memory mapping always valid as if XNACK is disable */
+#define KFD_IOCTL_SVM_FLAG_GPU_ALWAYS_MAPPED   0x0040
 
 /**
  * kfd_ioctl_svm_op - SVM ioctl operations
-- 
2.25.1



[PATCH 1/5] drm/amdkfd: add new flag for svm

2022-06-30 Thread Eric Huang
It is to add new option for always keeping gpu mapping.

Signed-off-by: Eric Huang 
---
 include/uapi/linux/kfd_ioctl.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h
index fd49dde4d5f4..eba04ebfd9a8 100644
--- a/include/uapi/linux/kfd_ioctl.h
+++ b/include/uapi/linux/kfd_ioctl.h
@@ -1076,6 +1076,8 @@ struct kfd_ioctl_cross_memory_copy_args {
 #define KFD_IOCTL_SVM_FLAG_GPU_EXEC0x0010
 /* GPUs mostly read, may allow similar optimizations as RO, but writes fault */
 #define KFD_IOCTL_SVM_FLAG_GPU_READ_MOSTLY 0x0020
+/* Keep GPU memory mapping always valid as if XNACK is disable */
+#define KFD_IOCTL_SVM_FLAG_GPU_ALWAYS_MAPPED   0x0040
 
 /**
  * kfd_ioctl_svm_op - SVM ioctl operations
-- 
2.25.1



[PATCH 3/5] drm/amdkfd: optimize svm range evict

2022-06-30 Thread Eric Huang
It is to avoid unnecessary queue eviction when range
is not mapped to gpu.

Signed-off-by: Eric Huang 
---
 drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 7 +++
 1 file changed, 7 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index 586bef4fcc8a..1f1f8f2dfa28 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -1775,8 +1775,12 @@ svm_range_evict(struct svm_range *prange, struct 
mm_struct *mm,
if (!p->xnack_enabled ||
(prange->flags & KFD_IOCTL_SVM_FLAG_GPU_ALWAYS_MAPPED)) {
int evicted_ranges;
+   bool mapped = prange->mapped_to_gpu;
 
list_for_each_entry(pchild, >child_list, child_list) {
+   if (!pchild->mapped_to_gpu)
+   continue;
+   mapped = true;
mutex_lock_nested(>lock, 1);
if (pchild->start <= last && pchild->last >= start) {
pr_debug("increment pchild invalid [0x%lx 
0x%lx]\n",
@@ -1786,6 +1790,9 @@ svm_range_evict(struct svm_range *prange, struct 
mm_struct *mm,
mutex_unlock(>lock);
}
 
+   if (!mapped)
+   return r;
+
if (prange->start <= last && prange->last >= start)
atomic_inc(>invalid);
 
-- 
2.25.1



[PATCH 2/5] drm/amdkfd: change svm range evict

2022-06-30 Thread Eric Huang
Adding always evict queues when flag is set to
KFD_IOCTL_SVM_FLAG_GPU_ALWAYS_MAPPED as if XNACK off.

Signed-off-by: Eric Huang 
---
 drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index 4bf2f75f853b..586bef4fcc8a 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -1772,7 +1772,8 @@ svm_range_evict(struct svm_range *prange, struct 
mm_struct *mm,
pr_debug("invalidate svms 0x%p prange [0x%lx 0x%lx] [0x%lx 0x%lx]\n",
 svms, prange->start, prange->last, start, last);
 
-   if (!p->xnack_enabled) {
+   if (!p->xnack_enabled ||
+   (prange->flags & KFD_IOCTL_SVM_FLAG_GPU_ALWAYS_MAPPED)) {
int evicted_ranges;
 
list_for_each_entry(pchild, >child_list, child_list) {
@@ -3321,7 +3322,8 @@ svm_range_set_attr(struct kfd_process *p, struct 
mm_struct *mm,
if (r)
goto out_unlock_range;
 
-   if (migrated && !p->xnack_enabled) {
+   if (migrated && (!p->xnack_enabled ||
+   (prange->flags & KFD_IOCTL_SVM_FLAG_GPU_ALWAYS_MAPPED))) {
pr_debug("restore_work will update mappings of GPUs\n");
mutex_unlock(>migrate_mutex);
continue;
-- 
2.25.1



[PATCH 0/5] Unified memory for CWSR save restore area

2022-06-30 Thread Eric Huang
amdkfd changes:

Eric Huang (3):
  drm/amdkfd: add new flag for svm
  drm/amdkfd: change svm range evict
  drm/amdkfd: optimize svm range evict

 drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 13 +++--
 include/uapi/linux/kfd_ioctl.h   |  2 ++
 2 files changed, 13 insertions(+), 2 deletions(-)

libhsakmt(thunk) changes:
which are based on https://github.com/RadeonOpenCompute/ROCT-Thunk-Interface

Eric Huang (2):
  libhsakmt: add new flags for svm
  libhsakmt: allocate unified memory for ctx save restore area

 include/hsakmttypes.h |   1 +
 include/linux/kfd_ioctl.h |   2 +
 src/queues.c  | 109 +-
 3 files changed, 98 insertions(+), 14 deletions(-)

-- 
2.25.1



Re: [PATCH 0/4] Fix compilation issues when using i386

2022-06-30 Thread Alex Deucher
On Thu, Jun 30, 2022 at 2:46 PM Rodrigo Siqueira
 wrote:
>
> Fix compilation issues when using i386
>
> We recently got feedback from Randy about issues in the x86-32
> compilation.I was able to reproduce a very similar issue by using:
>
> - gcc (Ubuntu 9.4.0-1ubuntu1~20.04.1) 9.4.0
> - make -j16 ARCH=i386
> - amd-staging-drm-next
>
> I'm able to see these issues:
>
> ERROR: modpost: "__nedf2" [drivers/gpu/drm/amd/amdgpu/amdgpu.ko] undefined!
> ERROR: modpost: "__muldf3" [drivers/gpu/drm/amd/amdgpu/amdgpu.ko] undefined!
> ERROR: modpost: "__floatunsidf" [drivers/gpu/drm/amd/amdgpu/amdgpu.ko] 
> undefined!
> ERROR: modpost: "__umoddi3" [drivers/gpu/drm/amd/amdgpu/amdgpu.ko] undefined!
>
> This patchset addresses each compilation issue in its own patch since I
> want to have an easy way to bisect display code in case of regressions.
> Please, let me know if you see any issue with these changes and if it
> fixes the 32-bit compilation. If you still see compilation issues,
> please, report:
>
> - GCC version
> - Config file
> - Branch
>
> Thanks
> Siqueira
>
> Rodrigo Siqueira (4):
>   drm/amd/display: Fix __umoddi3 undefined for 32 bit compilation
>   drm/amd/display: Fix __floatunsidf undefined for 32 bit compilation
>   drm/amd/display: Fix __muldf3 undefined for 32 bit compilation
>   drm/amd/display: Fix __nedf2 undefined for 32 bit compilation

Series is:
Reviewed-by: Alex Deucher 

I think this patch is also relevant, if someone can review it as well:
https://patchwork.freedesktop.org/patch/491429/

Alex

>
>  .../amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c  | 15 +--
>  .../gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c|  4 +++-
>  drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr.h   |  2 +-
>  3 files changed, 13 insertions(+), 8 deletions(-)
>
> --
> 2.25.1
>


[PATCH] drm/amd/display: Remove return value of Calculate256BBlockSizes

2022-06-30 Thread Maíra Canal
The function Calculate256BBlockSizes always returns true, regardless of
the parameters. As any file checks the return of the function, this
commit changes the return value to void.

Signed-off-by: Maíra Canal 
---
 drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c | 3 +--
 drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h | 2 +-
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c 
b/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c
index 2676710a5f2b..ed23c7c79d86 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c
@@ -957,7 +957,7 @@ static void recalculate_params(
}
 }
 
-bool Calculate256BBlockSizes(
+void Calculate256BBlockSizes(
enum source_format_class SourcePixelFormat,
enum dm_swizzle_mode SurfaceTiling,
unsigned int BytePerPixelY,
@@ -995,7 +995,6 @@ bool Calculate256BBlockSizes(
*BlockWidth256BytesY = 256 / BytePerPixelY / 
*BlockHeight256BytesY;
*BlockWidth256BytesC = 256 / BytePerPixelC / 
*BlockHeight256BytesC;
}
-   return true;
 }
 
 bool CalculateMinAndMaxPrefetchMode(
diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h 
b/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h
index 10ff536ef2a4..25a9a606ab6f 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h
+++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h
@@ -169,7 +169,7 @@ bool get_is_phantom_pipe(struct display_mode_lib *mode_lib,
unsigned int pipe_idx);
 void PixelClockAdjustmentForProgressiveToInterlaceUnit(struct display_mode_lib 
*mode_lib);
 
-bool Calculate256BBlockSizes(
+void Calculate256BBlockSizes(
enum source_format_class SourcePixelFormat,
enum dm_swizzle_mode SurfaceTiling,
unsigned int BytePerPixelY,
-- 
2.36.1



[PATCH 2/4] drm/amd/display: Fix __floatunsidf undefined for 32 bit compilation

2022-06-30 Thread Rodrigo Siqueira
When we tried to compile DCN32/321 for 32-bit architecture, we got this
error message:

ERROR: modpost: "__floatunsidf" [drivers/gpu/drm/amd/amdgpu/amdgpu.ko] 
undefined!

This was caused because we were trying to assign an unsigned int to a
double value which causes issues for 32-bit architecture. This issue can
be fixed by changing the value type.

Cc: Aurabindo Pillai 
Cc: Harry Wentland 
Cc: Alex Deucher 
Cc: Randy Dunlap 
Fixes: 9b79abf79c414 ("drm/amd/display: add CLKMGR changes for DCN32/321")
Reported-by: Stephen Rothwell 
Signed-off-by: Rodrigo Siqueira 
---
 drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr.h 
b/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr.h
index 9ae9439c8f7b..5d2b028e5dad 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr.h
@@ -208,7 +208,7 @@ struct wm_table {
 
 struct dummy_pstate_entry {
unsigned int dram_speed_mts;
-   unsigned int dummy_pstate_latency_us;
+   double dummy_pstate_latency_us;
 };
 
 struct clk_bw_params {
-- 
2.25.1



[PATCH 4/4] drm/amd/display: Fix __nedf2 undefined for 32 bit compilation

2022-06-30 Thread Rodrigo Siqueira
When we tried to compile DCN32/321 for 32-bit architecture, we got this
error message:

ERROR: modpost: "__nedf2" [drivers/gpu/drm/amd/amdgpu/amdgpu.ko] undefined!

This commit fixes this issue by rewriting a small part of the
dcn32_build_wm_range_table.

Cc: Aurabindo Pillai 
Cc: Harry Wentland 
Cc: Alex Deucher 
Cc: Randy Dunlap 
Fixes: 9b79abf79c414 ("drm/amd/display: add CLKMGR changes for DCN32/321")
Signed-off-by: Rodrigo Siqueira 
---
 .../drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c  | 11 +++
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c 
b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c
index b49a4e34d39b..1edb5aab8990 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c
@@ -155,10 +155,14 @@ static void dcn32_build_wm_range_table(struct 
clk_mgr_internal *clk_mgr)
uint16_t min_uclk_mhz   = 
clk_mgr->base.bw_params->clk_table.entries[0].memclk_mhz;
uint16_t min_dcfclk_mhz = 
clk_mgr->base.bw_params->clk_table.entries[0].dcfclk_mhz;
uint16_t setb_min_uclk_mhz  = min_uclk_mhz;
-   uint16_t setb_min_dcfclk_mhz= min_dcfclk_mhz;
+   uint16_t dcfclk_mhz_for_the_second_state = 
clk_mgr->base.ctx->dc->dml.soc.clock_limits[2].dcfclk_mhz;
+
/* For Set B ranges use min clocks state 2 when available, and report 
those to PM FW */
-   if (clk_mgr->base.ctx->dc->dml.soc.clock_limits[2].dcfclk_mhz)
-   setb_min_dcfclk_mhz = 
clk_mgr->base.ctx->dc->dml.soc.clock_limits[2].dcfclk_mhz;
+   if (dcfclk_mhz_for_the_second_state)
+   
clk_mgr->base.bw_params->wm_table.nv_entries[WM_B].pmfw_breakdown.min_dcfclk = 
dcfclk_mhz_for_the_second_state;
+   else
+   
clk_mgr->base.bw_params->wm_table.nv_entries[WM_B].pmfw_breakdown.min_dcfclk = 
clk_mgr->base.bw_params->clk_table.entries[0].dcfclk_mhz;
+
if (clk_mgr->base.bw_params->clk_table.entries[2].memclk_mhz)
setb_min_uclk_mhz = 
clk_mgr->base.bw_params->clk_table.entries[2].memclk_mhz;
 
@@ -181,7 +185,6 @@ static void dcn32_build_wm_range_table(struct 
clk_mgr_internal *clk_mgr)

clk_mgr->base.bw_params->wm_table.nv_entries[WM_B].dml_input.sr_exit_time_us = 
sr_exit_time_us;

clk_mgr->base.bw_params->wm_table.nv_entries[WM_B].dml_input.sr_enter_plus_exit_time_us
 = sr_enter_plus_exit_time_us;

clk_mgr->base.bw_params->wm_table.nv_entries[WM_B].pmfw_breakdown.wm_type = 
WATERMARKS_CLOCK_RANGE;
-   
clk_mgr->base.bw_params->wm_table.nv_entries[WM_B].pmfw_breakdown.min_dcfclk = 
setb_min_dcfclk_mhz;

clk_mgr->base.bw_params->wm_table.nv_entries[WM_B].pmfw_breakdown.max_dcfclk = 
0x;

clk_mgr->base.bw_params->wm_table.nv_entries[WM_B].pmfw_breakdown.min_uclk = 
setb_min_uclk_mhz;

clk_mgr->base.bw_params->wm_table.nv_entries[WM_B].pmfw_breakdown.max_uclk = 
0x;
-- 
2.25.1



[PATCH 3/4] drm/amd/display: Fix __muldf3 undefined for 32 bit compilation

2022-06-30 Thread Rodrigo Siqueira
Sometimes when trying to enable some feature, we have to define some
values with educated guesses, but we mark those values as TBD, which
means "To Be Determined". However, the correct way to approach it is by
loading that information from the firmware. Anyway, some of the values
that we were experimenting with caused this issue:

ERROR: modpost: "__muldf3" [drivers/gpu/drm/amd/amdgpu/amdgpu.ko] undefined!

This commit fixes this issue by removing the division by two since it is
harmless in this case.

Cc: Aurabindo Pillai 
Cc: Harry Wentland 
Cc: Alex Deucher 
Cc: Randy Dunlap 
Fixes: 9b79abf79c414 ("drm/amd/display: add CLKMGR changes for DCN32/321")
Signed-off-by: Rodrigo Siqueira 
---
 drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c 
b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c
index 4e8059f20007..b49a4e34d39b 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c
@@ -213,8 +213,8 @@ static void dcn32_build_wm_range_table(struct 
clk_mgr_internal *clk_mgr)
clk_mgr->base.bw_params->wm_table.nv_entries[WM_D].valid = true;

clk_mgr->base.bw_params->wm_table.nv_entries[WM_D].dml_input.pstate_latency_us 
= clk_mgr->base.bw_params->dummy_pstate_table[3].dummy_pstate_latency_us;

clk_mgr->base.bw_params->wm_table.nv_entries[WM_D].dml_input.fclk_change_latency_us
 = fclk_change_latency_us;
-   
clk_mgr->base.bw_params->wm_table.nv_entries[WM_D].dml_input.sr_exit_time_us = 
sr_exit_time_us / 2; // TBD
-   
clk_mgr->base.bw_params->wm_table.nv_entries[WM_D].dml_input.sr_enter_plus_exit_time_us
 = sr_enter_plus_exit_time_us / 2; // TBD
+   
clk_mgr->base.bw_params->wm_table.nv_entries[WM_D].dml_input.sr_exit_time_us = 
sr_exit_time_us; // TBD
+   
clk_mgr->base.bw_params->wm_table.nv_entries[WM_D].dml_input.sr_enter_plus_exit_time_us
 = sr_enter_plus_exit_time_us; // TBD

clk_mgr->base.bw_params->wm_table.nv_entries[WM_D].pmfw_breakdown.wm_type = 
WATERMARKS_MALL;

clk_mgr->base.bw_params->wm_table.nv_entries[WM_D].pmfw_breakdown.min_dcfclk = 
min_dcfclk_mhz;

clk_mgr->base.bw_params->wm_table.nv_entries[WM_D].pmfw_breakdown.max_dcfclk = 
0x;
-- 
2.25.1



[PATCH 0/4] Fix compilation issues when using i386

2022-06-30 Thread Rodrigo Siqueira
Fix compilation issues when using i386

We recently got feedback from Randy about issues in the x86-32
compilation.I was able to reproduce a very similar issue by using:

- gcc (Ubuntu 9.4.0-1ubuntu1~20.04.1) 9.4.0
- make -j16 ARCH=i386
- amd-staging-drm-next

I'm able to see these issues:

ERROR: modpost: "__nedf2" [drivers/gpu/drm/amd/amdgpu/amdgpu.ko] undefined! 
ERROR: modpost: "__muldf3" [drivers/gpu/drm/amd/amdgpu/amdgpu.ko] undefined! 
ERROR: modpost: "__floatunsidf" [drivers/gpu/drm/amd/amdgpu/amdgpu.ko] 
undefined! 
ERROR: modpost: "__umoddi3" [drivers/gpu/drm/amd/amdgpu/amdgpu.ko] undefined!

This patchset addresses each compilation issue in its own patch since I
want to have an easy way to bisect display code in case of regressions.
Please, let me know if you see any issue with these changes and if it
fixes the 32-bit compilation. If you still see compilation issues,
please, report:

- GCC version
- Config file
- Branch

Thanks
Siqueira

Rodrigo Siqueira (4):
  drm/amd/display: Fix __umoddi3 undefined for 32 bit compilation
  drm/amd/display: Fix __floatunsidf undefined for 32 bit compilation
  drm/amd/display: Fix __muldf3 undefined for 32 bit compilation
  drm/amd/display: Fix __nedf2 undefined for 32 bit compilation

 .../amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c  | 15 +--
 .../gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c|  4 +++-
 drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr.h   |  2 +-
 3 files changed, 13 insertions(+), 8 deletions(-)

-- 
2.25.1



[PATCH 1/4] drm/amd/display: Fix __umoddi3 undefined for 32 bit compilation

2022-06-30 Thread Rodrigo Siqueira
While we tried to build amdgpu on i386, we got this error:

ERROR: modpost: "__umoddi3" [drivers/gpu/drm/amd/amdgpu/amdgpu.ko] undefined!

This commit fixes this issue by replacing the standard module operator
with div_u64_rem.

Cc: Aurabindo Pillai 
Cc: Harry Wentland 
Cc: Alex Deucher 
Cc: Randy Dunlap 
Fixes: 9b0e0d433f74 ("drm/amd/display: Add dependant changes for DCN32/321")
Reported-by: Stephen Rothwell 
Signed-off-by: Rodrigo Siqueira 
---
 drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c 
b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c
index a10ec5919194..790aa2b3952c 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c
@@ -208,12 +208,14 @@ static uint32_t dcn32_cache_lines_for_surface(struct dc 
*dc, uint32_t surface_si
uint32_t num_cached_bytes = 0;
uint32_t remaining_size = 0;
uint32_t cache_line_size = dc->caps.cache_line_size;
+   uint32_t remainder = 0;
 
/* 1. Calculate surface size minus the number of bytes stored
 * in the first cache line (all bytes in first cache line might
 * not be fully used).
 */
-   num_cached_bytes = cache_line_size - (start_address % cache_line_size);
+   div_u64_rem(start_address, cache_line_size, );
+   num_cached_bytes = cache_line_size - remainder;
remaining_size = surface_size - num_cached_bytes;
 
/* 2. Calculate number of cache lines that will be fully used with
-- 
2.25.1



Re: [PATCH 4/5] ASoC: amd: add Machine driver for Jadeite platform

2022-06-30 Thread Mukunda,Vijendar
On 6/30/22 4:40 PM, Mark Brown wrote:
> On Thu, Jun 30, 2022 at 08:47:54AM +0530, Vijendar Mukunda wrote:
> 
>> +static int st_es8336_hw_params(struct snd_pcm_substream *substream,
>> +   struct snd_pcm_hw_params *params)
>> +{
>> +int ret = 0;
>> +struct snd_soc_pcm_runtime *rtd = asoc_substream_to_rtd(substream);
>> +struct snd_soc_dai *codec_dai = asoc_rtd_to_codec(rtd, 0);
>> +
>> +ret = snd_soc_dai_set_sysclk(codec_dai, 0, params_rate(params) * 256, 
>> SND_SOC_CLOCK_IN);
>> +if (ret < 0) {
>> +dev_err(rtd->dev, "can't set codec sysclk: %d\n", ret);
>> +return ret;
>> +}
>> +return ret;
>> +}
> 
>> +static const unsigned int st_channels[] = {
>> +DUAL_CHANNEL,
>> +};
>> +
>> +static const unsigned int st_rates[] = {
>> +48000,
>> +};
> 
> If the clock rate is fixed why not just set the sysclk once at startup
> too?
Yes. We can set sysclk once at the startup as clock rate is fixed.
Will modify the code and post the new patch.



Re: [PATCH 5/5] ASoC: amd: enable machine driver build for Jadeite platform

2022-06-30 Thread Mukunda,Vijendar
On 6/30/22 4:41 PM, Mark Brown wrote:
> On Thu, Jun 30, 2022 at 08:47:55AM +0530, Vijendar Mukunda wrote:
> 
>> +depends on SND_SOC_AMD_ACP && I2C && ACPI
> 
> The code treated ACPI as optional so you could relax the ACPI dependency
> ot be "ACPI || COMPILE_TEST" (I think the same applies to I2C).

Will fix it and push the newer version.


Re: [PATCH] drm/amd: Add debug mask for subviewport mclk switch

2022-06-30 Thread Alex Deucher
On Tue, Jun 28, 2022 at 5:26 PM Aurabindo Pillai
 wrote:
>
> [Why]
> Expose a new debugfs enum to force a subviewport memory clock switch
> to facilitate easy testing.
>

Is the debugfs support already plumbed in and this just enables you to
use it?  If it's in debugfs, do we really need a module parameter to
enable it or can it just be enabled by default and used via debugfs
when needed?

Alex

> Signed-off-by: Aurabindo Pillai 
> ---
>  drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 3 +++
>  drivers/gpu/drm/amd/include/amd_shared.h  | 1 +
>  2 files changed, 4 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c 
> b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> index c9145864ed2b..7a034ca95be2 100644
> --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> @@ -1559,6 +1559,9 @@ static int amdgpu_dm_init(struct amdgpu_device *adev)
> if (amdgpu_dc_debug_mask & DC_DISABLE_CLOCK_GATING)
> adev->dm.dc->debug.disable_clock_gate = true;
>
> +   if (amdgpu_dc_debug_mask & DC_FORCE_SUBVP_MCLK_SWITCH)
> +   adev->dm.dc->debug.force_subvp_mclk_switch = true;
> +
> r = dm_dmub_hw_init(adev);
> if (r) {
> DRM_ERROR("DMUB interface failed to initialize: status=%d\n", 
> r);
> diff --git a/drivers/gpu/drm/amd/include/amd_shared.h 
> b/drivers/gpu/drm/amd/include/amd_shared.h
> index bcdf7453a403..b1c55dd7b498 100644
> --- a/drivers/gpu/drm/amd/include/amd_shared.h
> +++ b/drivers/gpu/drm/amd/include/amd_shared.h
> @@ -247,6 +247,7 @@ enum DC_DEBUG_MASK {
> DC_DISABLE_DSC = 0x4,
> DC_DISABLE_CLOCK_GATING = 0x8,
> DC_DISABLE_PSR = 0x10,
> +   DC_FORCE_SUBVP_MCLK_SWITCH = 0x20,
>  };
>
>  enum amd_dpm_forced_level;
> --
> 2.36.1
>


Re: [PATCH] drm/amd: Add debug mask for subviewport mclk switch

2022-06-30 Thread Aurabindo Pillai

Ping

On 2022-06-28 17:26, Aurabindo Pillai wrote:

[Why]
Expose a new debugfs enum to force a subviewport memory clock switch
to facilitate easy testing.

Signed-off-by: Aurabindo Pillai 
---
  drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 3 +++
  drivers/gpu/drm/amd/include/amd_shared.h  | 1 +
  2 files changed, 4 insertions(+)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index c9145864ed2b..7a034ca95be2 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -1559,6 +1559,9 @@ static int amdgpu_dm_init(struct amdgpu_device *adev)
if (amdgpu_dc_debug_mask & DC_DISABLE_CLOCK_GATING)
adev->dm.dc->debug.disable_clock_gate = true;
  
+	if (amdgpu_dc_debug_mask & DC_FORCE_SUBVP_MCLK_SWITCH)

+   adev->dm.dc->debug.force_subvp_mclk_switch = true;
+
r = dm_dmub_hw_init(adev);
if (r) {
DRM_ERROR("DMUB interface failed to initialize: status=%d\n", 
r);
diff --git a/drivers/gpu/drm/amd/include/amd_shared.h 
b/drivers/gpu/drm/amd/include/amd_shared.h
index bcdf7453a403..b1c55dd7b498 100644
--- a/drivers/gpu/drm/amd/include/amd_shared.h
+++ b/drivers/gpu/drm/amd/include/amd_shared.h
@@ -247,6 +247,7 @@ enum DC_DEBUG_MASK {
DC_DISABLE_DSC = 0x4,
DC_DISABLE_CLOCK_GATING = 0x8,
DC_DISABLE_PSR = 0x10,
+   DC_FORCE_SUBVP_MCLK_SWITCH = 0x20,
  };
  
  enum amd_dpm_forced_level;


Re: [PATCH 2/2] drm/amdkfd: change svm range evict

2022-06-30 Thread Felix Kuehling

On 2022-06-30 11:19, Eric Huang wrote:


On 2022-06-29 19:29, Felix Kuehling wrote:

On 2022-06-29 18:53, Eric Huang wrote:



On 2022-06-29 18:20, Felix Kuehling wrote:

On 2022-06-28 17:43, Eric Huang wrote:

Two changes:
1. reducing unnecessary evict/unmap when range is not mapped to gpu.
2. adding always evict when flags is set to always_mapped.

Signed-off-by: Eric Huang 
---
  drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 10 --
  1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c

index 4bf2f75f853b..76e817687ef9 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -1767,12 +1767,16 @@ svm_range_evict(struct svm_range *prange, 
struct mm_struct *mm,

  struct kfd_process *p;
  int r = 0;
  +    if (!prange->mapped_to_gpu)
+    return 0;


This feels like an unrelated optimization that should be in a 
separate patch.


But I'm not sure this is correct, because it doesn't consider child 
ranges. svm_range_unmap_from_gpus already contains this check, so 
ranges should not be unmapped unnecessarily either way. Is there 
any other benefit to this change that I'm missing?

I will send another patch separately that considers child ranges.


I think this should only be done in the XNACK-off case. For XNACK-on 
it's already handled in the svm_range_unmap_from_gpus.

Yes and It is also done when KFD_IOCTL_SVM_FLAG_GPU_ALWAYS_MAPPED is set.



The benefit is it will reduce unnecessary queue evicts when 
allocating context save memory, which is unmapped to gpu.


That sounds wrong. The context save area should never be unmapped 
from GPU. That's the whole point of setting the 
KFD_IOCTL_SVM_FLAG_GPU_ALWAYS_MAPPED flag. I guess this is happening 
while migrating the context save area to VRAM for the first time, 
even before it's mapped to GPU?
Yes. It is for the first time when registering svm range and migrating 
to VRAM are doing together, at this moment, the range is not mapped to 
GPU.


I think there may be another eviction, when the CWSR header is 
initialized by the CPU. That would also migrate it back to system 
memory. To avoid that, you should probably register the context save 
area only after the header has been initialized.

Yes. I am using this way. Please look at patch 4/4.


I think avoiding an eviction when memory is migrated when it is first 
registered is worthwhile, not just for CWSR.



It is for efficiency reason. On the other hand, without this 
optimization KFDCWSRTest.InterruptRestore fails with queue 
preemption error.


What do you mean by "queue preemption error"? Does HWS hang?
HWS doesn't hang immediately, so there is not error for fence timeout 
"The cp might be in an unrecoverable state due to an unsuccessful 
queues preemption". The error is "HIQ MQD's queue_doorbell_id0 is not 
0, Queue preemption time out" after checking mqd manager, which means 
HWS abandons unmap queue request without returning timeout error to 
driver. And after this error, the following test will fail at queue 
creation as HWS hangs


OK, that sounds like the kind of bug the InterruptRestore test is meant 
to catch. I think you just created a better test by causing more 
preemptions. ;)


So we should do two things:

 * Avoid unnecessary preemptions in KFD
 * Improve the test to reproduce this hang even without unnecessary
   preemptions in KFD, so we can investigate the issue

Regards,
  Felix





I think the reason is the extra queue evicts make HWS too busy to 
preempt existing queues. There is one unmap_queue packet sent to HWS 
in current code, and will be three unmap_queue packets with unified 
memory allocation.


When queues of a process are already evicted, they should not get 
evicted again. That's handled by the qpd->evicted counter. There 
should never be multiple unmap_queues packets in flight at the same 
time. If you're seeing three unmap_queues, you should also see queues 
restored three times.


HWS should never be too busy to evict queues. If you're seeing 
preemptions fail, you may have found a bug.
The restore delay worker will do something differently in term of 
timing. It could restore queues before next unmap_queues, so the 
situation is too complicate to debug in multiple queues evict/restore 
environment. The error definitely means there is a bug, from driver 
point of view there is nothing wrong even adding extra queue eviction, 
so I try to avoid extra queue eviction and keep it as before. The 
bottom line is to make sure unified svm range for context save area 
doesn't cause any failure in kfdtest, so I can theoretically assume 
extra queue eviction/restoring causes HWS failure.


Regards,
Eric


Regards,
  Felix



So this optimization will keep only one unmap_queue as before.

Regards,
Eric


Regards,
  Felix



+
  p = container_of(svms, struct kfd_process, svms);
    pr_debug("invalidate svms 0x%p prange [0x%lx 0x%lx] 

Re: [PATCH 2/2] drm/amdkfd: change svm range evict

2022-06-30 Thread Eric Huang



On 2022-06-29 19:29, Felix Kuehling wrote:

On 2022-06-29 18:53, Eric Huang wrote:



On 2022-06-29 18:20, Felix Kuehling wrote:

On 2022-06-28 17:43, Eric Huang wrote:

Two changes:
1. reducing unnecessary evict/unmap when range is not mapped to gpu.
2. adding always evict when flags is set to always_mapped.

Signed-off-by: Eric Huang 
---
  drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 10 --
  1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c

index 4bf2f75f853b..76e817687ef9 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -1767,12 +1767,16 @@ svm_range_evict(struct svm_range *prange, 
struct mm_struct *mm,

  struct kfd_process *p;
  int r = 0;
  +    if (!prange->mapped_to_gpu)
+    return 0;


This feels like an unrelated optimization that should be in a 
separate patch.


But I'm not sure this is correct, because it doesn't consider child 
ranges. svm_range_unmap_from_gpus already contains this check, so 
ranges should not be unmapped unnecessarily either way. Is there any 
other benefit to this change that I'm missing?

I will send another patch separately that considers child ranges.


I think this should only be done in the XNACK-off case. For XNACK-on 
it's already handled in the svm_range_unmap_from_gpus.

Yes and It is also done when KFD_IOCTL_SVM_FLAG_GPU_ALWAYS_MAPPED is set.



The benefit is it will reduce unnecessary queue evicts when 
allocating context save memory, which is unmapped to gpu.


That sounds wrong. The context save area should never be unmapped from 
GPU. That's the whole point of setting the 
KFD_IOCTL_SVM_FLAG_GPU_ALWAYS_MAPPED flag. I guess this is happening 
while migrating the context save area to VRAM for the first time, even 
before it's mapped to GPU?
Yes. It is for the first time when registering svm range and migrating 
to VRAM are doing together, at this moment, the range is not mapped to GPU.


I think there may be another eviction, when the CWSR header is 
initialized by the CPU. That would also migrate it back to system 
memory. To avoid that, you should probably register the context save 
area only after the header has been initialized.

Yes. I am using this way. Please look at patch 4/4.


I think avoiding an eviction when memory is migrated when it is first 
registered is worthwhile, not just for CWSR.



It is for efficiency reason. On the other hand, without this 
optimization KFDCWSRTest.InterruptRestore fails with queue preemption 
error.


What do you mean by "queue preemption error"? Does HWS hang?
HWS doesn't hang immediately, so there is not error for fence timeout 
"The cp might be in an unrecoverable state due to an unsuccessful queues 
preemption". The error is "HIQ MQD's queue_doorbell_id0 is not 0, Queue 
preemption time out" after checking mqd manager, which means HWS 
abandons unmap queue request without returning timeout error to driver. 
And after this error, the following test will fail at queue creation as 
HWS hangs



I think the reason is the extra queue evicts make HWS too busy to 
preempt existing queues. There is one unmap_queue packet sent to HWS 
in current code, and will be three unmap_queue packets with unified 
memory allocation.


When queues of a process are already evicted, they should not get 
evicted again. That's handled by the qpd->evicted counter. There 
should never be multiple unmap_queues packets in flight at the same 
time. If you're seeing three unmap_queues, you should also see queues 
restored three times.


HWS should never be too busy to evict queues. If you're seeing 
preemptions fail, you may have found a bug.
The restore delay worker will do something differently in term of 
timing. It could restore queues before next unmap_queues, so the 
situation is too complicate to debug in multiple queues evict/restore 
environment. The error definitely means there is a bug, from driver 
point of view there is nothing wrong even adding extra queue eviction, 
so I try to avoid extra queue eviction and keep it as before. The bottom 
line is to make sure unified svm range for context save area doesn't 
cause any failure in kfdtest, so I can theoretically assume extra queue 
eviction/restoring causes HWS failure.


Regards,
Eric


Regards,
  Felix



So this optimization will keep only one unmap_queue as before.

Regards,
Eric


Regards,
  Felix



+
  p = container_of(svms, struct kfd_process, svms);
    pr_debug("invalidate svms 0x%p prange [0x%lx 0x%lx] [0x%lx 
0x%lx]\n",

   svms, prange->start, prange->last, start, last);
  -    if (!p->xnack_enabled) {
+    if (!p->xnack_enabled ||
+    (prange->flags & KFD_IOCTL_SVM_FLAG_GPU_ALWAYS_MAPPED)) {
  int evicted_ranges;
    list_for_each_entry(pchild, >child_list, 
child_list) {
@@ -3321,7 +3325,9 @@ svm_range_set_attr(struct kfd_process *p, 
struct mm_struct *mm,

  if (r)
 

Re: [PATCH v5 3/11] drm/amdkfd: Add GPU recoverable fault SMI event

2022-06-30 Thread philip yang

  


On 2022-06-30 10:19, Felix Kuehling
  wrote:


  
  Am 2022-06-28 um 10:50 schrieb Philip Yang:
  
  Use ktime_get_boottime_ns() as timestamp
to correlate with other

APIs. Output timestamp when GPU recoverable fault starts and
ends to

recover the fault, if migration happened or only GPU page table
is

updated to recover, fault address, if read or write fault.


Signed-off-by: Philip Yang 

---

  drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c | 17
+

  drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h |  6 +-

  drivers/gpu/drm/amd/amdkfd/kfd_svm.c    | 17
+

  drivers/gpu/drm/amd/amdkfd/kfd_svm.h    |  2 +-

  4 files changed, 36 insertions(+), 6 deletions(-)


diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c
b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c

index 55ed026435e2..b7e68283925f 100644

--- a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c

+++ b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c

@@ -244,6 +244,23 @@ void kfd_smi_event_update_vmfault(struct
kfd_dev *dev, uint16_t pasid)

    task_info.pid, task_info.task_name);

  }

  +void kfd_smi_event_page_fault_start(struct kfd_dev *dev,
pid_t pid,

+    unsigned long address, bool write_fault,

+    ktime_t ts)

+{

+    kfd_smi_event_add(pid, dev, KFD_SMI_EVENT_PAGE_FAULT_START,

+  "%lld -%d @%lx(%x) %c\n", ktime_to_ns(ts), pid,

+  address, dev->id, write_fault ? 'W' : 'R');

+}

+

+void kfd_smi_event_page_fault_end(struct kfd_dev *dev, pid_t
pid,

+  unsigned long address, bool migration)

+{

+    kfd_smi_event_add(pid, dev, KFD_SMI_EVENT_PAGE_FAULT_END,

+  "%lld -%d @%lx(%x) %c\n",
ktime_get_boottime_ns(),

+  pid, address, dev->id, migration ? 'M' : 'U');

+}

+

  int kfd_smi_event_open(struct kfd_dev *dev, uint32_t *fd)

  {

  struct kfd_smi_client *client;

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h
b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h

index dfe101c21166..7903718cd9eb 100644

--- a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h

+++ b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h

@@ -29,5 +29,9 @@ void kfd_smi_event_update_vmfault(struct
kfd_dev *dev, uint16_t pasid);

  void kfd_smi_event_update_thermal_throttling(struct kfd_dev
*dev,

   uint64_t throttle_bitmask);

  void kfd_smi_event_update_gpu_reset(struct kfd_dev *dev, bool
post_reset);

-

+void kfd_smi_event_page_fault_start(struct kfd_dev *dev, pid_t
pid,

+    unsigned long address, bool write_fault,

+    ktime_t ts);

+void kfd_smi_event_page_fault_end(struct kfd_dev *dev, pid_t
pid,

+  unsigned long address, bool migration);

  #endif

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c

index d6fc00d51c8c..2ad08a1f38dd 100644

--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c

+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c

@@ -32,6 +32,7 @@

  #include "kfd_priv.h"

  #include "kfd_svm.h"

  #include "kfd_migrate.h"

+#include "kfd_smi_events.h"

    #ifdef dev_fmt

  #undef dev_fmt

@@ -1617,7 +1618,7 @@ static int
svm_range_validate_and_map(struct mm_struct *mm,

  svm_range_unreserve_bos();

    if (!r)

-    prange->validate_timestamp =
ktime_to_us(ktime_get());

+    prange->validate_timestamp = ktime_get_boottime();

    return r;

  }

@@ -2694,11 +2695,12 @@ svm_range_restore_pages(struct
amdgpu_device *adev, unsigned int pasid,

  struct svm_range_list *svms;

  struct svm_range *prange;

 

Re: [PATCH v5 1/11] drm/amdkfd: Add KFD SMI event IDs and triggers

2022-06-30 Thread Felix Kuehling

Am 2022-06-28 um 10:50 schrieb Philip Yang:

Define new system management interface event IDs for migration, GPU
recoverable page fault, user queues eviction, restore and unmap from
GPU events and corresponding event triggers, those will be implemented
in the following patches.

Signed-off-by: Philip Yang 


Reviewed-by: Felix Kuehling 



---
  include/uapi/linux/kfd_ioctl.h | 37 ++
  1 file changed, 37 insertions(+)

diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h
index c648ed7c5ff1..f239e260796b 100644
--- a/include/uapi/linux/kfd_ioctl.h
+++ b/include/uapi/linux/kfd_ioctl.h
@@ -468,6 +468,43 @@ enum kfd_smi_event {
KFD_SMI_EVENT_THERMAL_THROTTLE = 2,
KFD_SMI_EVENT_GPU_PRE_RESET = 3,
KFD_SMI_EVENT_GPU_POST_RESET = 4,
+   KFD_SMI_EVENT_MIGRATE_START = 5,
+   KFD_SMI_EVENT_MIGRATE_END = 6,
+   KFD_SMI_EVENT_PAGE_FAULT_START = 7,
+   KFD_SMI_EVENT_PAGE_FAULT_END = 8,
+   KFD_SMI_EVENT_QUEUE_EVICTION = 9,
+   KFD_SMI_EVENT_QUEUE_RESTORE = 10,
+   KFD_SMI_EVENT_UNMAP_FROM_GPU = 11,
+
+   /*
+* max event number, as a flag bit to get events from all processes,
+* this requires super user permission, otherwise will not be able to
+* receive event from any process. Without this flag to receive events
+* from same process.
+*/
+   KFD_SMI_EVENT_ALL_PROCESS = 64
+};
+
+enum KFD_MIGRATE_TRIGGERS {
+   KFD_MIGRATE_TRIGGER_PREFETCH,
+   KFD_MIGRATE_TRIGGER_PAGEFAULT_GPU,
+   KFD_MIGRATE_TRIGGER_PAGEFAULT_CPU,
+   KFD_MIGRATE_TRIGGER_TTM_EVICTION
+};
+
+enum KFD_QUEUE_EVICTION_TRIGGERS {
+   KFD_QUEUE_EVICTION_TRIGGER_SVM,
+   KFD_QUEUE_EVICTION_TRIGGER_USERPTR,
+   KFD_QUEUE_EVICTION_TRIGGER_TTM,
+   KFD_QUEUE_EVICTION_TRIGGER_SUSPEND,
+   KFD_QUEUE_EVICTION_CRIU_CHECKPOINT,
+   KFD_QUEUE_EVICTION_CRIU_RESTORE
+};
+
+enum KFD_SVM_UNMAP_TRIGGERS {
+   KFD_SVM_UNMAP_TRIGGER_MMU_NOTIFY,
+   KFD_SVM_UNMAP_TRIGGER_MMU_NOTIFY_MIGRATE,
+   KFD_SVM_UNMAP_TRIGGER_UNMAP_FROM_CPU
  };
  
  #define KFD_SMI_EVENT_MASK_FROM_INDEX(i) (1ULL << ((i) - 1))


Re: [PATCH v5 8/11] drm/amdkfd: Bump KFD API version for SMI profiling event

2022-06-30 Thread Felix Kuehling

Am 2022-06-28 um 10:50 schrieb Philip Yang:

Indicate SMI profiling events available.

Signed-off-by: Philip Yang 


Reviewed-by: Felix Kuehling 



---
  include/uapi/linux/kfd_ioctl.h | 3 ++-
  1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h
index f239e260796b..b024e8ba865d 100644
--- a/include/uapi/linux/kfd_ioctl.h
+++ b/include/uapi/linux/kfd_ioctl.h
@@ -35,9 +35,10 @@
   * - 1.7 - Checkpoint Restore (CRIU) API
   * - 1.8 - CRIU - Support for SDMA transfers with GTT BOs
   * - 1.9 - Add available memory ioctl
+ * - 1.10 - Add SMI profiler event log
   */
  #define KFD_IOCTL_MAJOR_VERSION 1
-#define KFD_IOCTL_MINOR_VERSION 9
+#define KFD_IOCTL_MINOR_VERSION 10
  
  struct kfd_ioctl_get_version_args {

__u32 major_version;/* from KFD */


Re: [PATCH v5 7/11] drm/amdkfd: Asynchronously free smi_client

2022-06-30 Thread Felix Kuehling

Am 2022-06-28 um 10:50 schrieb Philip Yang:

The synchronize_rcu may take several ms, which noticeably slows down
applications close SMI event handle. Use call_rcu to free client->fifo
and client asynchronously and eliminate the synchronize_rcu call in the
user thread.

Signed-off-by: Philip Yang 


Reviewed-by: Felix Kuehling 



---
  drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c | 14 ++
  1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c
index e5896b7a16dd..0472b56de245 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c
@@ -38,6 +38,7 @@ struct kfd_smi_client {
uint64_t events;
struct kfd_dev *dev;
spinlock_t lock;
+   struct rcu_head rcu;
pid_t pid;
bool suser;
  };
@@ -137,6 +138,14 @@ static ssize_t kfd_smi_ev_write(struct file *filep, const 
char __user *user,
return sizeof(events);
  }
  
+static void kfd_smi_ev_client_free(struct rcu_head *p)

+{
+   struct kfd_smi_client *ev = container_of(p, struct kfd_smi_client, rcu);
+
+   kfifo_free(>fifo);
+   kfree(ev);
+}
+
  static int kfd_smi_ev_release(struct inode *inode, struct file *filep)
  {
struct kfd_smi_client *client = filep->private_data;
@@ -146,10 +155,7 @@ static int kfd_smi_ev_release(struct inode *inode, struct 
file *filep)
list_del_rcu(>list);
spin_unlock(>smi_lock);
  
-	synchronize_rcu();

-   kfifo_free(>fifo);
-   kfree(client);
-
+   call_rcu(>rcu, kfd_smi_ev_client_free);
return 0;
  }
  


Re: [PATCH v5 6/11] drm/amdkfd: Add unmap from GPU SMI event

2022-06-30 Thread Felix Kuehling

Am 2022-06-28 um 10:50 schrieb Philip Yang:

SVM range unmapped from GPUs when range is unmapped from CPU, or with
xnack on from MMU notifier when range is evicted or migrated.

Signed-off-by: Philip Yang 


Reviewed-by: Felix Kuehling 



---
  drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c |  9 
  drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h |  3 +++
  drivers/gpu/drm/amd/amdkfd/kfd_svm.c| 25 +++--
  3 files changed, 30 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c
index 3917c38204d0..e5896b7a16dd 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c
@@ -318,6 +318,15 @@ void kfd_smi_event_queue_restore_rescheduled(struct 
mm_struct *mm)
kfd_unref_process(p);
  }
  
+void kfd_smi_event_unmap_from_gpu(struct kfd_dev *dev, pid_t pid,

+ unsigned long address, unsigned long last,
+ uint32_t trigger)
+{
+   kfd_smi_event_add(pid, dev, KFD_SMI_EVENT_UNMAP_FROM_GPU,
+ "%lld -%d @%lx(%lx) %x %d\n", ktime_get_boottime_ns(),
+ pid, address, last - address + 1, dev->id, trigger);
+}
+
  int kfd_smi_event_open(struct kfd_dev *dev, uint32_t *fd)
  {
struct kfd_smi_client *client;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h 
b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h
index b23292637239..76fe4e0ec2d2 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h
@@ -46,4 +46,7 @@ void kfd_smi_event_queue_eviction(struct kfd_dev *dev, pid_t 
pid,
  uint32_t trigger);
  void kfd_smi_event_queue_restore(struct kfd_dev *dev, pid_t pid);
  void kfd_smi_event_queue_restore_rescheduled(struct mm_struct *mm);
+void kfd_smi_event_unmap_from_gpu(struct kfd_dev *dev, pid_t pid,
+ unsigned long address, unsigned long last,
+ uint32_t trigger);
  #endif
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index ddc1e4651919..bf888ae84c92 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -1200,7 +1200,7 @@ svm_range_unmap_from_gpu(struct amdgpu_device *adev, 
struct amdgpu_vm *vm,
  
  static int

  svm_range_unmap_from_gpus(struct svm_range *prange, unsigned long start,
- unsigned long last)
+ unsigned long last, uint32_t trigger)
  {
DECLARE_BITMAP(bitmap, MAX_GPU_INSTANCE);
struct kfd_process_device *pdd;
@@ -1232,6 +1232,9 @@ svm_range_unmap_from_gpus(struct svm_range *prange, 
unsigned long start,
return -EINVAL;
}
  
+		kfd_smi_event_unmap_from_gpu(pdd->dev, p->lead_thread->pid,

+start, last, trigger);
+
r = svm_range_unmap_from_gpu(pdd->dev->adev,
 drm_priv_to_vm(pdd->drm_priv),
 start, last, );
@@ -1759,7 +1762,8 @@ static void svm_range_restore_work(struct work_struct 
*work)
   */
  static int
  svm_range_evict(struct svm_range *prange, struct mm_struct *mm,
-   unsigned long start, unsigned long last)
+   unsigned long start, unsigned long last,
+   enum mmu_notifier_event event)
  {
struct svm_range_list *svms = prange->svms;
struct svm_range *pchild;
@@ -1804,6 +1808,12 @@ svm_range_evict(struct svm_range *prange, struct 
mm_struct *mm,
msecs_to_jiffies(AMDGPU_SVM_RANGE_RESTORE_DELAY_MS));
} else {
unsigned long s, l;
+   uint32_t trigger;
+
+   if (event == MMU_NOTIFY_MIGRATE)
+   trigger = KFD_SVM_UNMAP_TRIGGER_MMU_NOTIFY_MIGRATE;
+   else
+   trigger = KFD_SVM_UNMAP_TRIGGER_MMU_NOTIFY;
  
  		pr_debug("invalidate unmap svms 0x%p [0x%lx 0x%lx] from GPUs\n",

 prange->svms, start, last);
@@ -1812,13 +1822,13 @@ svm_range_evict(struct svm_range *prange, struct 
mm_struct *mm,
s = max(start, pchild->start);
l = min(last, pchild->last);
if (l >= s)
-   svm_range_unmap_from_gpus(pchild, s, l);
+   svm_range_unmap_from_gpus(pchild, s, l, 
trigger);
mutex_unlock(>lock);
}
s = max(start, prange->start);
l = min(last, prange->last);
if (l >= s)
-   svm_range_unmap_from_gpus(prange, s, l);
+   svm_range_unmap_from_gpus(prange, s, l, trigger);
}
  
  	return r;

@@ -2232,6 +2242,7 @@ static 

Re: [PATCH v5 5/11] drm/amdkfd: Add user queue eviction restore SMI event

2022-06-30 Thread Felix Kuehling

Am 2022-06-28 um 10:50 schrieb Philip Yang:

Output user queue eviction and restore event. User queue eviction may be
triggered by svm or userptr MMU notifier, TTM eviction, device suspend
and CRIU checkpoint and restore.

User queue restore may be rescheduled if eviction happens again while
restore.

Signed-off-by: Philip Yang 


Reviewed-by: Felix Kuehling 



---
  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h|  2 +-
  .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  | 12 ---
  drivers/gpu/drm/amd/amdkfd/kfd_chardev.c  |  4 +--
  drivers/gpu/drm/amd/amdkfd/kfd_device.c   |  4 +--
  drivers/gpu/drm/amd/amdkfd/kfd_priv.h |  2 +-
  drivers/gpu/drm/amd/amdkfd/kfd_process.c  | 15 ++--
  drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c   | 35 +++
  drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h   |  4 +++
  drivers/gpu/drm/amd/amdkfd/kfd_svm.c  |  6 ++--
  9 files changed, 69 insertions(+), 15 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
index b25b41f50213..73bf8b5f2aa9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -336,7 +336,7 @@ void amdgpu_amdkfd_release_notify(struct amdgpu_bo *bo)
  }
  #endif
  /* KGD2KFD callbacks */
-int kgd2kfd_quiesce_mm(struct mm_struct *mm);
+int kgd2kfd_quiesce_mm(struct mm_struct *mm, uint32_t trigger);
  int kgd2kfd_resume_mm(struct mm_struct *mm);
  int kgd2kfd_schedule_evict_and_restore_process(struct mm_struct *mm,
struct dma_fence *fence);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index 5ba9070d8722..6a7e045ddcc5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -32,6 +32,7 @@
  #include "amdgpu_dma_buf.h"
  #include 
  #include "amdgpu_xgmi.h"
+#include "kfd_smi_events.h"
  
  /* Userptr restore delay, just long enough to allow consecutive VM

   * changes to accumulate
@@ -2381,7 +2382,7 @@ int amdgpu_amdkfd_evict_userptr(struct kgd_mem *mem,
evicted_bos = atomic_inc_return(_info->evicted_bos);
if (evicted_bos == 1) {
/* First eviction, stop the queues */
-   r = kgd2kfd_quiesce_mm(mm);
+   r = kgd2kfd_quiesce_mm(mm, KFD_QUEUE_EVICTION_TRIGGER_USERPTR);
if (r)
pr_err("Failed to quiesce KFD\n");
schedule_delayed_work(_info->restore_userptr_work,
@@ -2655,13 +2656,16 @@ static void amdgpu_amdkfd_restore_userptr_worker(struct 
work_struct *work)
  
  unlock_out:

mutex_unlock(_info->lock);
-   mmput(mm);
-   put_task_struct(usertask);
  
  	/* If validation failed, reschedule another attempt */

-   if (evicted_bos)
+   if (evicted_bos) {
schedule_delayed_work(_info->restore_userptr_work,
msecs_to_jiffies(AMDGPU_USERPTR_RESTORE_DELAY_MS));
+
+   kfd_smi_event_queue_restore_rescheduled(mm);
+   }
+   mmput(mm);
+   put_task_struct(usertask);
  }
  
  /** amdgpu_amdkfd_gpuvm_restore_process_bos - Restore all BOs for the given

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index a0246b4bae6b..6abfe10229a2 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -2428,7 +2428,7 @@ static int criu_restore(struct file *filep,
 * Set the process to evicted state to avoid running any new queues 
before all the memory
 * mappings are ready.
 */
-   ret = kfd_process_evict_queues(p);
+   ret = kfd_process_evict_queues(p, KFD_QUEUE_EVICTION_CRIU_RESTORE);
if (ret)
goto exit_unlock;
  
@@ -2547,7 +2547,7 @@ static int criu_process_info(struct file *filep,

goto err_unlock;
}
  
-	ret = kfd_process_evict_queues(p);

+   ret = kfd_process_evict_queues(p, KFD_QUEUE_EVICTION_CRIU_CHECKPOINT);
if (ret)
goto err_unlock;
  
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c

index c8fee0dbfdcb..6ec0e9f0927d 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
@@ -837,7 +837,7 @@ void kgd2kfd_interrupt(struct kfd_dev *kfd, const void 
*ih_ring_entry)
spin_unlock_irqrestore(>interrupt_lock, flags);
  }
  
-int kgd2kfd_quiesce_mm(struct mm_struct *mm)

+int kgd2kfd_quiesce_mm(struct mm_struct *mm, uint32_t trigger)
  {
struct kfd_process *p;
int r;
@@ -851,7 +851,7 @@ int kgd2kfd_quiesce_mm(struct mm_struct *mm)
return -ESRCH;
  
  	WARN(debug_evictions, "Evicting pid %d", p->lead_thread->pid);

-   r = kfd_process_evict_queues(p);
+   r = kfd_process_evict_queues(p, trigger);
  
  	

Re: [PATCH v5 4/11] drm/amdkfd: Add migration SMI event

2022-06-30 Thread Felix Kuehling

Am 2022-06-28 um 10:50 schrieb Philip Yang:

For migration start and end event, output timestamp when migration
starts, ends, svm range address and size, GPU id of migration source and
destination and svm range attributes,

Migration trigger could be prefetch, CPU or GPU page fault and TTM
eviction.

Signed-off-by: Philip Yang 


Reviewed-by: Felix Kuehling 



---
  drivers/gpu/drm/amd/amdkfd/kfd_migrate.c| 53 -
  drivers/gpu/drm/amd/amdkfd/kfd_migrate.h|  5 +-
  drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c | 22 +
  drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h |  8 
  drivers/gpu/drm/amd/amdkfd/kfd_svm.c| 16 ---
  5 files changed, 83 insertions(+), 21 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
index fb8a94e52656..9667015a6cbc 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
@@ -32,6 +32,7 @@
  #include "kfd_priv.h"
  #include "kfd_svm.h"
  #include "kfd_migrate.h"
+#include "kfd_smi_events.h"
  
  #ifdef dev_fmt

  #undef dev_fmt
@@ -402,8 +403,9 @@ svm_migrate_copy_to_vram(struct amdgpu_device *adev, struct 
svm_range *prange,
  static long
  svm_migrate_vma_to_vram(struct amdgpu_device *adev, struct svm_range *prange,
struct vm_area_struct *vma, uint64_t start,
-   uint64_t end)
+   uint64_t end, uint32_t trigger)
  {
+   struct kfd_process *p = container_of(prange->svms, struct kfd_process, 
svms);
uint64_t npages = (end - start) >> PAGE_SHIFT;
struct kfd_process_device *pdd;
struct dma_fence *mfence = NULL;
@@ -430,6 +432,11 @@ svm_migrate_vma_to_vram(struct amdgpu_device *adev, struct 
svm_range *prange,
migrate.dst = migrate.src + npages;
scratch = (dma_addr_t *)(migrate.dst + npages);
  
+	kfd_smi_event_migration_start(adev->kfd.dev, p->lead_thread->pid,

+ start >> PAGE_SHIFT, end >> PAGE_SHIFT,
+ 0, adev->kfd.dev->id, 
prange->prefetch_loc,
+ prange->preferred_loc, trigger);
+
r = migrate_vma_setup();
if (r) {
dev_err(adev->dev, "%s: vma setup fail %d range [0x%lx 
0x%lx]\n",
@@ -458,6 +465,10 @@ svm_migrate_vma_to_vram(struct amdgpu_device *adev, struct 
svm_range *prange,
svm_migrate_copy_done(adev, mfence);
migrate_vma_finalize();
  
+	kfd_smi_event_migration_end(adev->kfd.dev, p->lead_thread->pid,

+   start >> PAGE_SHIFT, end >> PAGE_SHIFT,
+   0, adev->kfd.dev->id, trigger);
+
svm_range_dma_unmap(adev->dev, scratch, 0, npages);
svm_range_free_dma_mappings(prange);
  
@@ -479,6 +490,7 @@ svm_migrate_vma_to_vram(struct amdgpu_device *adev, struct svm_range *prange,

   * @prange: range structure
   * @best_loc: the device to migrate to
   * @mm: the process mm structure
+ * @trigger: reason of migration
   *
   * Context: Process context, caller hold mmap read lock, svms lock, prange 
lock
   *
@@ -487,7 +499,7 @@ svm_migrate_vma_to_vram(struct amdgpu_device *adev, struct 
svm_range *prange,
   */
  static int
  svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc,
-   struct mm_struct *mm)
+   struct mm_struct *mm, uint32_t trigger)
  {
unsigned long addr, start, end;
struct vm_area_struct *vma;
@@ -524,7 +536,7 @@ svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t 
best_loc,
break;
  
  		next = min(vma->vm_end, end);

-   r = svm_migrate_vma_to_vram(adev, prange, vma, addr, next);
+   r = svm_migrate_vma_to_vram(adev, prange, vma, addr, next, 
trigger);
if (r < 0) {
pr_debug("failed %ld to migrate\n", r);
break;
@@ -655,8 +667,10 @@ svm_migrate_copy_to_ram(struct amdgpu_device *adev, struct 
svm_range *prange,
   */
  static long
  svm_migrate_vma_to_ram(struct amdgpu_device *adev, struct svm_range *prange,
-  struct vm_area_struct *vma, uint64_t start, uint64_t end)
+  struct vm_area_struct *vma, uint64_t start, uint64_t end,
+  uint32_t trigger)
  {
+   struct kfd_process *p = container_of(prange->svms, struct kfd_process, 
svms);
uint64_t npages = (end - start) >> PAGE_SHIFT;
unsigned long upages = npages;
unsigned long cpages = 0;
@@ -685,6 +699,11 @@ svm_migrate_vma_to_ram(struct amdgpu_device *adev, struct 
svm_range *prange,
migrate.dst = migrate.src + npages;
scratch = (dma_addr_t *)(migrate.dst + npages);
  
+	kfd_smi_event_migration_start(adev->kfd.dev, p->lead_thread->pid,

+ start >> PAGE_SHIFT, end >> PAGE_SHIFT,
+

Re: [PATCH v5 3/11] drm/amdkfd: Add GPU recoverable fault SMI event

2022-06-30 Thread Felix Kuehling



Am 2022-06-28 um 10:50 schrieb Philip Yang:

Use ktime_get_boottime_ns() as timestamp to correlate with other
APIs. Output timestamp when GPU recoverable fault starts and ends to
recover the fault, if migration happened or only GPU page table is
updated to recover, fault address, if read or write fault.

Signed-off-by: Philip Yang 
---
  drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c | 17 +
  drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h |  6 +-
  drivers/gpu/drm/amd/amdkfd/kfd_svm.c| 17 +
  drivers/gpu/drm/amd/amdkfd/kfd_svm.h|  2 +-
  4 files changed, 36 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c
index 55ed026435e2..b7e68283925f 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c
@@ -244,6 +244,23 @@ void kfd_smi_event_update_vmfault(struct kfd_dev *dev, 
uint16_t pasid)
  task_info.pid, task_info.task_name);
  }
  
+void kfd_smi_event_page_fault_start(struct kfd_dev *dev, pid_t pid,

+   unsigned long address, bool write_fault,
+   ktime_t ts)
+{
+   kfd_smi_event_add(pid, dev, KFD_SMI_EVENT_PAGE_FAULT_START,
+ "%lld -%d @%lx(%x) %c\n", ktime_to_ns(ts), pid,
+ address, dev->id, write_fault ? 'W' : 'R');
+}
+
+void kfd_smi_event_page_fault_end(struct kfd_dev *dev, pid_t pid,
+ unsigned long address, bool migration)
+{
+   kfd_smi_event_add(pid, dev, KFD_SMI_EVENT_PAGE_FAULT_END,
+ "%lld -%d @%lx(%x) %c\n", ktime_get_boottime_ns(),
+ pid, address, dev->id, migration ? 'M' : 'U');
+}
+
  int kfd_smi_event_open(struct kfd_dev *dev, uint32_t *fd)
  {
struct kfd_smi_client *client;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h 
b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h
index dfe101c21166..7903718cd9eb 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h
@@ -29,5 +29,9 @@ void kfd_smi_event_update_vmfault(struct kfd_dev *dev, 
uint16_t pasid);
  void kfd_smi_event_update_thermal_throttling(struct kfd_dev *dev,
 uint64_t throttle_bitmask);
  void kfd_smi_event_update_gpu_reset(struct kfd_dev *dev, bool post_reset);
-
+void kfd_smi_event_page_fault_start(struct kfd_dev *dev, pid_t pid,
+   unsigned long address, bool write_fault,
+   ktime_t ts);
+void kfd_smi_event_page_fault_end(struct kfd_dev *dev, pid_t pid,
+ unsigned long address, bool migration);
  #endif
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index d6fc00d51c8c..2ad08a1f38dd 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -32,6 +32,7 @@
  #include "kfd_priv.h"
  #include "kfd_svm.h"
  #include "kfd_migrate.h"
+#include "kfd_smi_events.h"
  
  #ifdef dev_fmt

  #undef dev_fmt
@@ -1617,7 +1618,7 @@ static int svm_range_validate_and_map(struct mm_struct 
*mm,
svm_range_unreserve_bos();
  
  	if (!r)

-   prange->validate_timestamp = ktime_to_us(ktime_get());
+   prange->validate_timestamp = ktime_get_boottime();
  
  	return r;

  }
@@ -2694,11 +2695,12 @@ svm_range_restore_pages(struct amdgpu_device *adev, 
unsigned int pasid,
struct svm_range_list *svms;
struct svm_range *prange;
struct kfd_process *p;
-   uint64_t timestamp;
+   ktime_t timestamp = ktime_get_boottime();
int32_t best_loc;
int32_t gpuidx = MAX_GPU_INSTANCE;
bool write_locked = false;
struct vm_area_struct *vma;
+   bool migration = false;
int r = 0;
  
  	if (!KFD_IS_SVM_API_SUPPORTED(adev->kfd.dev)) {

@@ -2775,9 +2777,9 @@ svm_range_restore_pages(struct amdgpu_device *adev, 
unsigned int pasid,
goto out_unlock_range;
}
  
-	timestamp = ktime_to_us(ktime_get()) - prange->validate_timestamp;

/* skip duplicate vm fault on different pages of same range */
-   if (timestamp < AMDGPU_SVM_RANGE_RETRY_FAULT_PENDING) {
+   if (ktime_before(timestamp, ktime_add_ns(prange->validate_timestamp,
+   AMDGPU_SVM_RANGE_RETRY_FAULT_PENDING))) {


You changed the timestamp units from us to ns. I think you'll need to 
update AMDGPU_SVM_RANGE_RETRY_FAULT_PENDING (multiply with 1000) to 
account for that.


Other than that, this patch is

Reviewed-by: Felix Kuehling 



pr_debug("svms 0x%p [0x%lx %lx] already restored\n",
 svms, prange->start, prange->last);
r = 0;
@@ -2813,7 +2815,11 @@ svm_range_restore_pages(struct amdgpu_device *adev, 
unsigned int 

RE: [PATCH v3] drm/amdkfd: simplify vm_validate_pt_pd_bos

2022-06-30 Thread Yu, Lang
[AMD Official Use Only - General]

Ping!

Hi Felix, what do you think? Thanks!

Regards,
Lang

>-Original Message-
>From: Koenig, Christian 
>Sent: Tuesday, June 14, 2022 5:08 PM
>To: Yu, Lang ; amd-gfx@lists.freedesktop.org
>Cc: Kuehling, Felix ; Deucher, Alexander
>; Huang, Ray 
>Subject: Re: [PATCH v3] drm/amdkfd: simplify vm_validate_pt_pd_bos
>
>Am 14.06.22 um 11:03 schrieb Lang Yu:
>> We don't need to validate and map root PD specially here, it would be
>> validated and mapped by amdgpu_vm_validate_pt_bos if it is evicted.
>>
>> The special case is when turning a GFX VM to a compute VM, if
>> vm_update_mode changed, we should make sure root PD gets mapped. So
>> just map root PD after updating vm->update_funcs in
>> amdgpu_vm_make_compute whether the vm_update_mode changed or
>not.
>>
>> v3:
>>   - Add some comments suggested by Christian.
>>
>> v2:
>>   - Don't rename vm_validate_pt_pd_bos and make it public.
>>
>> Signed-off-by: Lang Yu 
>
>I can't judge the kfd part, but the VM stuff looks good to me now.
>
>Acked-by: Christian König 
>
>> ---
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 14 -
>-
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c   |  8 
>>   2 files changed, 8 insertions(+), 14 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>> index 50bb590c3306..c9ef242177e2 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>> @@ -439,22 +439,8 @@ static int vm_validate_pt_pd_bos(struct
>amdgpu_vm *vm)
>>  return ret;
>>  }
>>
>> -ret = amdgpu_amdkfd_validate_vm_bo(NULL, pd);
>> -if (ret) {
>> -pr_err("failed to validate PD\n");
>> -return ret;
>> -}
>> -
>>  vm->pd_phys_addr = amdgpu_gmc_pd_addr(vm->root.bo);
>>
>> -if (vm->use_cpu_for_update) {
>> -ret = amdgpu_bo_kmap(pd, NULL);
>> -if (ret) {
>> -pr_err("failed to kmap PD, ret=%d\n", ret);
>> -return ret;
>> -}
>> -}
>> -
>>  return 0;
>>   }
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> index 703552f9a6d7..3a6b827e540c 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> @@ -2225,6 +2225,14 @@ int amdgpu_vm_make_compute(struct
>amdgpu_device *adev, struct amdgpu_vm *vm)
>>  } else {
>>  vm->update_funcs = _vm_sdma_funcs;
>>  }
>> +/*
>> + * Make sure root PD gets mapped. As vm_update_mode could be
>changed
>> + * when turning a GFX VM into a compute VM.
>> + */
>> +r = vm->update_funcs->map_table(to_amdgpu_bo_vm(vm-
>>root.bo));
>> +if (r)
>> +goto unreserve_bo;
>> +
>>  dma_fence_put(vm->last_update);
>>  vm->last_update = NULL;
>>  vm->is_compute_context = true;


Re: [PATCH] drm/amd/pm: update SMU 13.0.0 driver_if header

2022-06-30 Thread Deucher, Alexander
[Public]

Acked-by: Alex Deucher 

From: Quan, Evan 
Sent: Thursday, June 30, 2022 4:26 AM
To: amd-gfx@lists.freedesktop.org 
Cc: Zhang, Hawking ; Gao, Likun ; 
Deucher, Alexander ; Quan, Evan 
Subject: [PATCH] drm/amd/pm: update SMU 13.0.0 driver_if header

And bump the version to 0x2A.

Signed-off-by: Evan Quan 
Change-Id: I2b66b9a289177a979201fca2056ff11e0b81f2bb
---
 .../gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_0.h | 3 ++-
 drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h   | 2 +-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_0.h 
b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_0.h
index 6a817c7ce110..5becfc1bb2ec 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_0.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_0.h
@@ -1041,7 +1041,8 @@ typedef struct {
   uint16_tGfxclkFreqGfxUlv; // in MHz
   uint8_t GfxIdlePadding2[2];

-  uint32_tGfxoffSpare[16];
+  uint32_tGfxOffEntryHysteresis;
+  uint32_tGfxoffSpare[15];

   // GFX GPO
   uint32_tGfxGpoSpare[16];
diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h 
b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h
index 7c0ca962cf28..3f12127f9c24 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h
@@ -30,7 +30,7 @@
 #define SMU13_DRIVER_IF_VERSION_ALDE 0x08
 #define SMU13_DRIVER_IF_VERSION_SMU_V13_0_4 0x04
 #define SMU13_DRIVER_IF_VERSION_SMU_V13_0_5 0x04
-#define SMU13_DRIVER_IF_VERSION_SMU_V13_0_0 0x29
+#define SMU13_DRIVER_IF_VERSION_SMU_V13_0_0 0x2A
 #define SMU13_DRIVER_IF_VERSION_SMU_V13_0_7 0x2A

 #define SMU13_MODE1_RESET_WAIT_TIME_IN_MS 500  //500ms
--
2.29.0



Re: [PATCH v7 04/14] mm: add device coherent vma selection for memory migration

2022-06-30 Thread Alistair Popple


David Hildenbrand  writes:

> On 29.06.22 05:54, Alex Sierra wrote:
>> This case is used to migrate pages from device memory, back to system
>> memory. Device coherent type memory is cache coherent from device and CPU
>> point of view.
>>
>> Signed-off-by: Alex Sierra 
>> Acked-by: Felix Kuehling 
>> Reviewed-by: Alistair Poppple 
>> Signed-off-by: Christoph Hellwig 
>
>
> I'm not too familiar with this code, please excuse my naive questions:
>
>> @@ -148,15 +148,21 @@ static int migrate_vma_collect_pmd(pmd_t *pmdp,
>>  if (is_writable_device_private_entry(entry))
>>  mpfn |= MIGRATE_PFN_WRITE;
>>  } else {
>> -if (!(migrate->flags & MIGRATE_VMA_SELECT_SYSTEM))
>> -goto next;
>
> Why not exclude MIGRATE_VMA_SELECT_DEVICE_PRIVATE here? IIRC that would
> have happened before this change.

I might be missing something as I don't quite follow - this path is for
normal system pages so we only want to skip selecting them if
MIGRATE_VMA_SELECT_SYSTEM or MIGRATE_VMA_SELECT_DEVICE_COHERENT aren't
set.

Note that MIGRATE_VMA_SELECT_DEVICE_PRIVATE doesn't apply here because
we already know it's not a device private page by virtue of
pte_present(pte) == True.

>>  pfn = pte_pfn(pte);
>> -if (is_zero_pfn(pfn)) {
>> +if (is_zero_pfn(pfn) &&
>> +(migrate->flags & MIGRATE_VMA_SELECT_SYSTEM)) {
>>  mpfn = MIGRATE_PFN_MIGRATE;
>>  migrate->cpages++;
>>  goto next;
>>  }
>>  page = vm_normal_page(migrate->vma, addr, pte);
>> +if (page && !is_zone_device_page(page) &&
>
> I'm wondering if that check logically belongs into patch #2.

I don't think so as it would break functionality until the below
conditionals are added - we explicitly don't want to skip
is_zone_device_page(page) == False here because that is the pages we are
trying to select.

You could add in this:

>> +!(migrate->flags & MIGRATE_VMA_SELECT_SYSTEM))

But then in patch 2 we know this can never be true because we've already
checked for !MIGRATE_VMA_SELECT_SYSTEM there.

>> +goto next;
>> +else if (page && is_device_coherent_page(page) &&
>> +(!(migrate->flags & 
>> MIGRATE_VMA_SELECT_DEVICE_COHERENT) ||
>> + page->pgmap->owner != migrate->pgmap_owner))
>
>
> In general LGTM


Re: [PATCH v7 04/14] mm: add device coherent vma selection for memory migration

2022-06-30 Thread David Hildenbrand
On 29.06.22 05:54, Alex Sierra wrote:
> This case is used to migrate pages from device memory, back to system
> memory. Device coherent type memory is cache coherent from device and CPU
> point of view.
> 
> Signed-off-by: Alex Sierra 
> Acked-by: Felix Kuehling 
> Reviewed-by: Alistair Poppple 
> Signed-off-by: Christoph Hellwig 


I'm not too familiar with this code, please excuse my naive questions:

> @@ -148,15 +148,21 @@ static int migrate_vma_collect_pmd(pmd_t *pmdp,
>   if (is_writable_device_private_entry(entry))
>   mpfn |= MIGRATE_PFN_WRITE;
>   } else {
> - if (!(migrate->flags & MIGRATE_VMA_SELECT_SYSTEM))
> - goto next;

Why not exclude MIGRATE_VMA_SELECT_DEVICE_PRIVATE here? IIRC that would
have happened before this change.


>   pfn = pte_pfn(pte);
> - if (is_zero_pfn(pfn)) {
> + if (is_zero_pfn(pfn) &&
> + (migrate->flags & MIGRATE_VMA_SELECT_SYSTEM)) {
>   mpfn = MIGRATE_PFN_MIGRATE;
>   migrate->cpages++;
>   goto next;
>   }
>   page = vm_normal_page(migrate->vma, addr, pte);
> + if (page && !is_zone_device_page(page) &&

I'm wondering if that check logically belongs into patch #2.

> + !(migrate->flags & MIGRATE_VMA_SELECT_SYSTEM))
> + goto next;
> + else if (page && is_device_coherent_page(page) &&
> + (!(migrate->flags & 
> MIGRATE_VMA_SELECT_DEVICE_COHERENT) ||
> +  page->pgmap->owner != migrate->pgmap_owner))


In general LGTM

-- 
Thanks,

David / dhildenb



Re: [PATCH 5/5] ASoC: amd: enable machine driver build for Jadeite platform

2022-06-30 Thread Mark Brown
On Thu, Jun 30, 2022 at 08:47:55AM +0530, Vijendar Mukunda wrote:

> + depends on SND_SOC_AMD_ACP && I2C && ACPI

The code treated ACPI as optional so you could relax the ACPI dependency
ot be "ACPI || COMPILE_TEST" (I think the same applies to I2C).


signature.asc
Description: PGP signature


Re: [PATCH 4/5] ASoC: amd: add Machine driver for Jadeite platform

2022-06-30 Thread Mark Brown
On Thu, Jun 30, 2022 at 08:47:54AM +0530, Vijendar Mukunda wrote:

> +static int st_es8336_hw_params(struct snd_pcm_substream *substream,
> +struct snd_pcm_hw_params *params)
> +{
> + int ret = 0;
> + struct snd_soc_pcm_runtime *rtd = asoc_substream_to_rtd(substream);
> + struct snd_soc_dai *codec_dai = asoc_rtd_to_codec(rtd, 0);
> +
> + ret = snd_soc_dai_set_sysclk(codec_dai, 0, params_rate(params) * 256, 
> SND_SOC_CLOCK_IN);
> + if (ret < 0) {
> + dev_err(rtd->dev, "can't set codec sysclk: %d\n", ret);
> + return ret;
> + }
> + return ret;
> +}

> +static const unsigned int st_channels[] = {
> + DUAL_CHANNEL,
> +};
> +
> +static const unsigned int st_rates[] = {
> + 48000,
> +};

If the clock rate is fixed why not just set the sysclk once at startup
too?


signature.asc
Description: PGP signature


[PATCH] drm/amd/pm: update SMU 13.0.0 driver_if header

2022-06-30 Thread Evan Quan
And bump the version to 0x2A.

Signed-off-by: Evan Quan 
Change-Id: I2b66b9a289177a979201fca2056ff11e0b81f2bb
---
 .../gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_0.h | 3 ++-
 drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h   | 2 +-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_0.h 
b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_0.h
index 6a817c7ce110..5becfc1bb2ec 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_0.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_0.h
@@ -1041,7 +1041,8 @@ typedef struct {
   uint16_tGfxclkFreqGfxUlv; // in MHz
   uint8_t GfxIdlePadding2[2];
 
-  uint32_tGfxoffSpare[16];
+  uint32_tGfxOffEntryHysteresis;
+  uint32_tGfxoffSpare[15];
 
   // GFX GPO
   uint32_tGfxGpoSpare[16];
diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h 
b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h
index 7c0ca962cf28..3f12127f9c24 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h
@@ -30,7 +30,7 @@
 #define SMU13_DRIVER_IF_VERSION_ALDE 0x08
 #define SMU13_DRIVER_IF_VERSION_SMU_V13_0_4 0x04
 #define SMU13_DRIVER_IF_VERSION_SMU_V13_0_5 0x04
-#define SMU13_DRIVER_IF_VERSION_SMU_V13_0_0 0x29
+#define SMU13_DRIVER_IF_VERSION_SMU_V13_0_0 0x2A
 #define SMU13_DRIVER_IF_VERSION_SMU_V13_0_7 0x2A
 
 #define SMU13_MODE1_RESET_WAIT_TIME_IN_MS 500  //500ms
-- 
2.29.0