RE: [PATCH v2] drm/amdgpu: Extend full access wait time in guest

2021-08-08 Thread Zhao, Victor
[AMD Official Use Only]

Reviewed-by: Victor Zhao 

-Original Message-
From: Peng Ju Zhou  
Sent: Monday, August 9, 2021 1:31 PM
To: amd-gfx@lists.freedesktop.org
Cc: Zhao, Victor ; Zhou, Peng Ju 
Subject: [PATCH v2] drm/amdgpu: Extend full access wait time in guest

From: Victor Zhao 

- Extend wait time and add retry, currently 6s * 2times
- Change timing algorithm

Signed-off-by: Victor Zhao 
Signed-off-by: Peng Ju Zhou 
---
 drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c | 16 
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c 
b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c
index 9f7aac435d69..b48e68f46a5c 100644
--- a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c
+++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c
@@ -96,7 +96,11 @@ static int xgpu_nv_poll_ack(struct amdgpu_device *adev)
 
 static int xgpu_nv_poll_msg(struct amdgpu_device *adev, enum idh_event event)  
{
-   int r, timeout = NV_MAILBOX_POLL_MSG_TIMEDOUT;
+   int r;
+   uint64_t timeout, now;
+
+   now = (uint64_t)ktime_to_ms(ktime_get());
+   timeout = now + NV_MAILBOX_POLL_MSG_TIMEDOUT;
 
do {
r = xgpu_nv_mailbox_rcv_msg(adev, event); @@ -104,8 +108,8 @@ 
static int xgpu_nv_poll_msg(struct amdgpu_device *adev, enum idh_event event)
return 0;
 
msleep(10);
-   timeout -= 10;
-   } while (timeout > 1);
+   now = (uint64_t)ktime_to_ms(ktime_get());
+   } while (timeout > now);
 
 
return -ETIME;
@@ -149,9 +153,10 @@ static void xgpu_nv_mailbox_trans_msg (struct 
amdgpu_device *adev,  static int xgpu_nv_send_access_requests(struct 
amdgpu_device *adev,
enum idh_request req)
 {
-   int r;
+   int r, retry = 1;
enum idh_event event = -1;
 
+send_request:
xgpu_nv_mailbox_trans_msg(adev, req, 0, 0, 0);
 
switch (req) {
@@ -170,6 +175,9 @@ static int xgpu_nv_send_access_requests(struct 
amdgpu_device *adev,
if (event != -1) {
r = xgpu_nv_poll_msg(adev, event);
if (r) {
+   if (retry++ < 2)
+   goto send_request;
+
if (req != IDH_REQ_GPU_INIT_DATA) {
pr_err("Doesn't get msg:%d from pf, 
error=%d\n", event, r);
return r;
--
2.17.1


[PATCH v2] drm/amdgpu: Extend full access wait time in guest

2021-08-08 Thread Peng Ju Zhou
From: Victor Zhao 

- Extend wait time and add retry, currently 6s * 2times
- Change timing algorithm

Signed-off-by: Victor Zhao 
Signed-off-by: Peng Ju Zhou 
---
 drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c | 16 
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c 
b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c
index 9f7aac435d69..b48e68f46a5c 100644
--- a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c
+++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c
@@ -96,7 +96,11 @@ static int xgpu_nv_poll_ack(struct amdgpu_device *adev)
 
 static int xgpu_nv_poll_msg(struct amdgpu_device *adev, enum idh_event event)
 {
-   int r, timeout = NV_MAILBOX_POLL_MSG_TIMEDOUT;
+   int r;
+   uint64_t timeout, now;
+
+   now = (uint64_t)ktime_to_ms(ktime_get());
+   timeout = now + NV_MAILBOX_POLL_MSG_TIMEDOUT;
 
do {
r = xgpu_nv_mailbox_rcv_msg(adev, event);
@@ -104,8 +108,8 @@ static int xgpu_nv_poll_msg(struct amdgpu_device *adev, 
enum idh_event event)
return 0;
 
msleep(10);
-   timeout -= 10;
-   } while (timeout > 1);
+   now = (uint64_t)ktime_to_ms(ktime_get());
+   } while (timeout > now);
 
 
return -ETIME;
@@ -149,9 +153,10 @@ static void xgpu_nv_mailbox_trans_msg (struct 
amdgpu_device *adev,
 static int xgpu_nv_send_access_requests(struct amdgpu_device *adev,
enum idh_request req)
 {
-   int r;
+   int r, retry = 1;
enum idh_event event = -1;
 
+send_request:
xgpu_nv_mailbox_trans_msg(adev, req, 0, 0, 0);
 
switch (req) {
@@ -170,6 +175,9 @@ static int xgpu_nv_send_access_requests(struct 
amdgpu_device *adev,
if (event != -1) {
r = xgpu_nv_poll_msg(adev, event);
if (r) {
+   if (retry++ < 2)
+   goto send_request;
+
if (req != IDH_REQ_GPU_INIT_DATA) {
pr_err("Doesn't get msg:%d from pf, 
error=%d\n", event, r);
return r;
-- 
2.17.1



[PATCH] drm/amdgpu: Extend full access wait time in guest

2021-08-08 Thread Peng Ju Zhou
From: Victor Zhao 

- Extend wait time and add retry, currently 3s * 4times
- Change timing algorithm

Signed-off-by: Victor Zhao 
Signed-off-by: Peng Ju Zhou 
---
 drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c | 16 
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c 
b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c
index 9f7aac435d69..b48e68f46a5c 100644
--- a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c
+++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c
@@ -96,7 +96,11 @@ static int xgpu_nv_poll_ack(struct amdgpu_device *adev)
 
 static int xgpu_nv_poll_msg(struct amdgpu_device *adev, enum idh_event event)
 {
-   int r, timeout = NV_MAILBOX_POLL_MSG_TIMEDOUT;
+   int r;
+   uint64_t timeout, now;
+
+   now = (uint64_t)ktime_to_ms(ktime_get());
+   timeout = now + NV_MAILBOX_POLL_MSG_TIMEDOUT;
 
do {
r = xgpu_nv_mailbox_rcv_msg(adev, event);
@@ -104,8 +108,8 @@ static int xgpu_nv_poll_msg(struct amdgpu_device *adev, 
enum idh_event event)
return 0;
 
msleep(10);
-   timeout -= 10;
-   } while (timeout > 1);
+   now = (uint64_t)ktime_to_ms(ktime_get());
+   } while (timeout > now);
 
 
return -ETIME;
@@ -149,9 +153,10 @@ static void xgpu_nv_mailbox_trans_msg (struct 
amdgpu_device *adev,
 static int xgpu_nv_send_access_requests(struct amdgpu_device *adev,
enum idh_request req)
 {
-   int r;
+   int r, retry = 1;
enum idh_event event = -1;
 
+send_request:
xgpu_nv_mailbox_trans_msg(adev, req, 0, 0, 0);
 
switch (req) {
@@ -170,6 +175,9 @@ static int xgpu_nv_send_access_requests(struct 
amdgpu_device *adev,
if (event != -1) {
r = xgpu_nv_poll_msg(adev, event);
if (r) {
+   if (retry++ < 2)
+   goto send_request;
+
if (req != IDH_REQ_GPU_INIT_DATA) {
pr_err("Doesn't get msg:%d from pf, 
error=%d\n", event, r);
return r;
-- 
2.17.1



[PATCHv3 1/2] drm/amd/amdgpu embed hw_fence into amdgpu_job

2021-08-08 Thread Jingwen Chen
From: Jack Zhang 

Why: Previously hw fence is alloced separately with job.
It caused historical lifetime issues and corner cases.
The ideal situation is to take fence to manage both job
and fence's lifetime, and simplify the design of gpu-scheduler.

How:
We propose to embed hw_fence into amdgpu_job.
1. We cover the normal job submission by this method.
2. For ib_test, and submit without a parent job keep the
legacy way to create a hw fence separately.
v2:
use AMDGPU_FENCE_FLAG_EMBED_IN_JOB_BIT to show that the fence is
embeded in a job.
v3:
remove redundant variable ring in amdgpu_job

Signed-off-by: Jingwen Chen 
Signed-off-by: Jack Zhang 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c  |  1 -
 drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c   | 62 -
 drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c  |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 35 
 drivers/gpu/drm/amd/amdgpu/amdgpu_job.h |  3 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h|  5 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c  |  2 +-
 8 files changed, 82 insertions(+), 30 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
index 7b46ba551cb2..3003ee1c9487 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -714,7 +714,6 @@ int amdgpu_amdkfd_submit_ib(struct kgd_dev *kgd, enum 
kgd_engine_type engine,
ret = dma_fence_wait(f, false);
 
 err_ib_sched:
-   dma_fence_put(f);
amdgpu_job_free(job);
 err:
return ret;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
index 536005bff24a..277128846dd1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
@@ -1414,7 +1414,7 @@ static void amdgpu_ib_preempt_mark_partial_job(struct 
amdgpu_ring *ring)
continue;
}
job = to_amdgpu_job(s_job);
-   if (preempted && job->fence == fence)
+   if (preempted && (>hw_fence) == fence)
/* mark the job as preempted */
job->preemption_status |= AMDGPU_IB_PREEMPTED;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
index 7495911516c2..c26eec660ec1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
@@ -129,30 +129,45 @@ static u32 amdgpu_fence_read(struct amdgpu_ring *ring)
  *
  * @ring: ring the fence is associated with
  * @f: resulting fence object
+ * @job: job the fence is embeded in
  * @flags: flags to pass into the subordinate .emit_fence() call
  *
  * Emits a fence command on the requested ring (all asics).
  * Returns 0 on success, -ENOMEM on failure.
  */
-int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f,
+int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f, struct 
amdgpu_job *job,
  unsigned flags)
 {
struct amdgpu_device *adev = ring->adev;
-   struct amdgpu_fence *fence;
+   struct dma_fence *fence;
+   struct amdgpu_fence *am_fence;
struct dma_fence __rcu **ptr;
uint32_t seq;
int r;
 
-   fence = kmem_cache_alloc(amdgpu_fence_slab, GFP_KERNEL);
-   if (fence == NULL)
-   return -ENOMEM;
+   if (job == NULL) {
+   /* create a sperate hw fence */
+   am_fence = kmem_cache_alloc(amdgpu_fence_slab, GFP_ATOMIC);
+   if (am_fence == NULL)
+   return -ENOMEM;
+   fence = _fence->base;
+   am_fence->ring = ring;
+   } else {
+   /* take use of job-embedded fence */
+   fence = >hw_fence;
+   }
 
seq = ++ring->fence_drv.sync_seq;
-   fence->ring = ring;
-   dma_fence_init(>base, _fence_ops,
+   dma_fence_init(fence, _fence_ops,
   >fence_drv.lock,
   adev->fence_context + ring->idx,
   seq);
+
+   if (job != NULL) {
+   /* mark this fence has a parent job */
+   set_bit(AMDGPU_FENCE_FLAG_EMBED_IN_JOB_BIT, >flags);
+   }
+
amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr,
   seq, flags | AMDGPU_FENCE_FLAG_INT);
pm_runtime_get_noresume(adev_to_drm(adev)->dev);
@@ -175,9 +190,9 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, struct 
dma_fence **f,
/* This function can't be called concurrently anyway, otherwise
 * emitting the fence would mess up the hardware ring buffer.
 */
-   rcu_assign_pointer(*ptr, dma_fence_get(>base));
+   rcu_assign_pointer(*ptr, dma_fence_get(fence));
 
-   *f = >base;
+   *f = fence;
 
return 0;
 }
@@ -621,8 +636,16 @@ 

[PATCH 2/4] amdgpu/pm: Replace vega12, 20 usage of sprintf with sysfs_emit

2021-08-08 Thread Darren Powell
=== Test ===
AMDGPU_PCI_ADDR=`lspci -nn | grep "VGA\|Display" | cut -d " " -f 1`
AMDGPU_HWMON=`ls -la /sys/class/hwmon | grep $AMDGPU_PCI_ADDR | awk '{print 
$9}'`
HWMON_DIR=/sys/class/hwmon/${AMDGPU_HWMON}
LOGFILE=pp_printf.test.log

lspci -nn | grep "VGA\|Display"  > $LOGFILE
FILES="pp_dpm_sclk
pp_features
pp_power_profile_mode "

for f in $FILES
do
  echo === $f === >> $LOGFILE
  cat $HWMON_DIR/device/$f >> $LOGFILE
done
cat $LOGFILE

Signed-off-by: Darren Powell 
---
 .../drm/amd/pm/powerplay/hwmgr/vega12_hwmgr.c | 14 ++--
 .../drm/amd/pm/powerplay/hwmgr/vega20_hwmgr.c | 74 +--
 2 files changed, 44 insertions(+), 44 deletions(-)

diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega12_hwmgr.c 
b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega12_hwmgr.c
index 29e0d1d4035a..8558718e15a8 100644
--- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega12_hwmgr.c
+++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega12_hwmgr.c
@@ -2146,13 +2146,13 @@ static int vega12_get_ppfeature_status(struct pp_hwmgr 
*hwmgr, char *buf)
"[EnableAllSmuFeatures] Failed to get enabled smc features!",
return ret);
 
-   size += sprintf(buf + size, "Current ppfeatures: 0x%016llx\n", 
features_enabled);
-   size += sprintf(buf + size, "%-19s %-22s %s\n",
+   size += sysfs_emit_at(buf, size, "Current ppfeatures: 0x%016llx\n", 
features_enabled);
+   size += sysfs_emit_at(buf, size, "%-19s %-22s %s\n",
output_title[0],
output_title[1],
output_title[2]);
for (i = 0; i < GNLD_FEATURES_MAX; i++) {
-   size += sprintf(buf + size, "%-19s 0x%016llx %6s\n",
+   size += sysfs_emit_at(buf, size, "%-19s 0x%016llx %6s\n",
ppfeature_name[i],
1ULL << i,
(features_enabled & (1ULL << i)) ? "Y" : "N");
@@ -2256,7 +2256,7 @@ static int vega12_print_clock_levels(struct pp_hwmgr 
*hwmgr,
"Attempt to get gfx clk levels Failed!",
return -1);
for (i = 0; i < clocks.num_levels; i++)
-   size += sprintf(buf + size, "%d: %uMhz %s\n",
+   size += sysfs_emit_at(buf, size, "%d: %uMhz %s\n",
i, clocks.data[i].clocks_in_khz / 1000,
(clocks.data[i].clocks_in_khz / 1000 == now / 
100) ? "*" : "");
break;
@@ -2272,7 +2272,7 @@ static int vega12_print_clock_levels(struct pp_hwmgr 
*hwmgr,
"Attempt to get memory clk levels Failed!",
return -1);
for (i = 0; i < clocks.num_levels; i++)
-   size += sprintf(buf + size, "%d: %uMhz %s\n",
+   size += sysfs_emit_at(buf, size, "%d: %uMhz %s\n",
i, clocks.data[i].clocks_in_khz / 1000,
(clocks.data[i].clocks_in_khz / 1000 == now / 
100) ? "*" : "");
break;
@@ -2290,7 +2290,7 @@ static int vega12_print_clock_levels(struct pp_hwmgr 
*hwmgr,
"Attempt to get soc clk levels Failed!",
return -1);
for (i = 0; i < clocks.num_levels; i++)
-   size += sprintf(buf + size, "%d: %uMhz %s\n",
+   size += sysfs_emit_at(buf, size, "%d: %uMhz %s\n",
i, clocks.data[i].clocks_in_khz / 1000,
(clocks.data[i].clocks_in_khz / 1000 == now) ? 
"*" : "");
break;
@@ -2308,7 +2308,7 @@ static int vega12_print_clock_levels(struct pp_hwmgr 
*hwmgr,
"Attempt to get dcef clk levels Failed!",
return -1);
for (i = 0; i < clocks.num_levels; i++)
-   size += sprintf(buf + size, "%d: %uMhz %s\n",
+   size += sysfs_emit_at(buf, size, "%d: %uMhz %s\n",
i, clocks.data[i].clocks_in_khz / 1000,
(clocks.data[i].clocks_in_khz / 1000 == now) ? 
"*" : "");
break;
diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_hwmgr.c 
b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_hwmgr.c
index 0791309586c5..0f07d17e9641 100644
--- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_hwmgr.c
+++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_hwmgr.c
@@ -3243,13 +3243,13 @@ static int vega20_get_ppfeature_status(struct pp_hwmgr 
*hwmgr, char *buf)
"[EnableAllSmuFeatures] Failed to get enabled smc 
features!",
return ret);
 
-   size += sprintf(buf + size, "Current ppfeatures: 0x%016llx\n", 
features_enabled);
-   size += sprintf(buf + size, "%-19s %-22s %s\n",
+