Re: [PATCH 09/18] drm/amdgpu: implement patch for fixing a known bug

2018-06-01 Thread Boyuan Zhang



On 2018-06-01 01:54 PM, Christian König wrote:

Am 01.06.2018 um 18:35 schrieb boyuan.zh...@amd.com:

From: Boyuan Zhang 

Implement a patch to maunally reset read pointer

v2: using ring assignment instead of amdgpu_ring_write. adding comments
for each steps in the patch function.

v3: fixing a typo bug.

Signed-off-by: Boyuan Zhang 
---
  drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c | 92 
+++

  1 file changed, 92 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c 
b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c

index ea1d677..e8d24a6 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
@@ -40,6 +40,7 @@ static void vcn_v1_0_set_dec_ring_funcs(struct 
amdgpu_device *adev);

  static void vcn_v1_0_set_enc_ring_funcs(struct amdgpu_device *adev);
  static void vcn_v1_0_set_jpeg_ring_funcs(struct amdgpu_device *adev);
  static void vcn_v1_0_set_irq_funcs(struct amdgpu_device *adev);
+static void vcn_v1_0_jpeg_ring_set_patch_ring(struct amdgpu_ring 
*ring, uint32_t ptr);

    /**
   * vcn_v1_0_early_init - set function pointers
@@ -1442,6 +1443,97 @@ static void vcn_v1_0_jpeg_ring_nop(struct 
amdgpu_ring *ring, uint32_t count)

  }
  }
  +static void vcn_v1_0_jpeg_ring_patch_wreg(struct amdgpu_ring 
*ring, uint32_t , uint32_t reg_offset, uint32_t val)


 only works in C++, you need to use *ptr here :)

But apart from that the patch now looks good to me,
Christian.


Oops, sorry my fault. I've no idea why I made this obvious mistake. This 
is totally accidental... Anyways, just sent out patch#9 v4.





+{
+    struct amdgpu_device *adev = ring->adev;
+    ring->ring[ptr++] = PACKETJ(SOC15_REG_OFFSET(UVD, 0, 
mmUVD_JRBC_EXTERNAL_REG_BASE), 0, 0, PACKETJ_TYPE0);

+    if (((reg_offset >= 0x1f800) && (reg_offset <= 0x21fff)) ||
+    ((reg_offset >= 0x1e000) && (reg_offset <= 0x1e1ff))) {
+    ring->ring[ptr++] = 0;
+    ring->ring[ptr++] = PACKETJ((reg_offset >> 2), 0, 0, 
PACKETJ_TYPE0);

+    } else {
+    ring->ring[ptr++] = reg_offset;
+    ring->ring[ptr++] = PACKETJ(0, 0, 0, PACKETJ_TYPE0);
+    }
+    ring->ring[ptr++] = val;
+}
+
+static void vcn_v1_0_jpeg_ring_set_patch_ring(struct amdgpu_ring 
*ring, uint32_t ptr)

+{
+    struct amdgpu_device *adev = ring->adev;
+
+    uint32_t reg, reg_offset, val, mask, i;
+
+    // 1st: program mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_LOW
+    reg = SOC15_REG_OFFSET(UVD, 0, 
mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_LOW);

+    reg_offset = (reg << 2);
+    val = lower_32_bits(ring->gpu_addr);
+    vcn_v1_0_jpeg_ring_patch_wreg(ring, ptr, reg_offset, val);
+
+    // 2nd: program mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_HIGH
+    reg = SOC15_REG_OFFSET(UVD, 0, 
mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_HIGH);

+    reg_offset = (reg << 2);
+    val = upper_32_bits(ring->gpu_addr);
+    vcn_v1_0_jpeg_ring_patch_wreg(ring, ptr, reg_offset, val);
+
+    // 3rd to 5th: issue MEM_READ commands
+    for (i = 0; i <= 2; i++) {
+    ring->ring[ptr++] = PACKETJ(0, 0, 0, PACKETJ_TYPE2);
+    ring->ring[ptr++] = 0;
+    }
+
+    // 6th: program mmUVD_JRBC_RB_CNTL register to enable NO_FETCH 
and RPTR write ability

+    reg = SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_RB_CNTL);
+    reg_offset = (reg << 2);
+    val = 0x13;
+    vcn_v1_0_jpeg_ring_patch_wreg(ring, ptr, reg_offset, val);
+
+    // 7th: program mmUVD_JRBC_RB_REF_DATA
+    reg = SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_RB_REF_DATA);
+    reg_offset = (reg << 2);
+    val = 0x1;
+    vcn_v1_0_jpeg_ring_patch_wreg(ring, ptr, reg_offset, val);
+
+    // 8th: issue conditional register read mmUVD_JRBC_RB_CNTL
+    reg = SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_RB_CNTL);
+    reg_offset = (reg << 2);
+    val = 0x1;
+    mask = 0x1;
+
+    ring->ring[ptr++] = PACKETJ(SOC15_REG_OFFSET(UVD, 0, 
mmUVD_JRBC_RB_COND_RD_TIMER), 0, 0, PACKETJ_TYPE0);

+    ring->ring[ptr++] = 0x01400200;
+    ring->ring[ptr++] = PACKETJ(SOC15_REG_OFFSET(UVD, 0, 
mmUVD_JRBC_RB_REF_DATA), 0, 0, PACKETJ_TYPE0);

+    ring->ring[ptr++] = val;
+    ring->ring[ptr++] = PACKETJ(SOC15_REG_OFFSET(UVD, 0, 
mmUVD_JRBC_EXTERNAL_REG_BASE), 0, 0, PACKETJ_TYPE0);

+    if (((reg_offset >= 0x1f800) && (reg_offset <= 0x21fff)) ||
+    ((reg_offset >= 0x1e000) && (reg_offset <= 0x1e1ff))) {
+    ring->ring[ptr++] = 0;
+    ring->ring[ptr++] = PACKETJ((reg_offset >> 2), 0, 0, 
PACKETJ_TYPE3);

+    } else {
+    ring->ring[ptr++] = reg_offset;
+    ring->ring[ptr++] = PACKETJ(0, 0, 0, PACKETJ_TYPE3);
+    }
+    ring->ring[ptr++] = mask;
+
+    //9th to 21st: insert no-op
+    for (i = 0; i <= 12; i++) {
+    ring->ring[ptr++] = PACKETJ(0, 0, 0, PACKETJ_TYPE6);
+    ring->ring[ptr++] = 0;
+    }
+
+    //22nd: reset mmUVD_JRBC_RB_RPTR
+    reg = SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_RB_RPTR);
+    reg_offset = (reg << 2);
+    val = 0;
+    vcn_v1_0_jpeg_ring_patch_wreg(ring, ptr, reg_offset, val);
+
+    //23rd: program mmUVD_JRBC_RB_CNTL to disable 

[PATCH 09/18] drm/amdgpu: implement patch for fixing a known bug

2018-06-01 Thread boyuan.zhang
From: Boyuan Zhang 

Implement a patch to maunally reset read pointer

v2: using ring assignment instead of amdgpu_ring_write. adding comments
for each steps in the patch function.
v3: fixing a typo bug.
v4: fixing a bug in v3.

Signed-off-by: Boyuan Zhang 
---
 drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c | 92 +++
 1 file changed, 92 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c 
b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
index ea1d677..e8d24a6 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
@@ -40,6 +40,7 @@ static void vcn_v1_0_set_dec_ring_funcs(struct amdgpu_device 
*adev);
 static void vcn_v1_0_set_enc_ring_funcs(struct amdgpu_device *adev);
 static void vcn_v1_0_set_jpeg_ring_funcs(struct amdgpu_device *adev);
 static void vcn_v1_0_set_irq_funcs(struct amdgpu_device *adev);
+static void vcn_v1_0_jpeg_ring_set_patch_ring(struct amdgpu_ring *ring, 
uint32_t ptr);
 
 /**
  * vcn_v1_0_early_init - set function pointers
@@ -1442,6 +1443,97 @@ static void vcn_v1_0_jpeg_ring_nop(struct amdgpu_ring 
*ring, uint32_t count)
}
 }
 
+static void vcn_v1_0_jpeg_ring_patch_wreg(struct amdgpu_ring *ring, uint32_t 
*ptr, uint32_t reg_offset, uint32_t val)
+{
+   struct amdgpu_device *adev = ring->adev;
+   ring->ring[(*ptr)++] = PACKETJ(SOC15_REG_OFFSET(UVD, 0, 
mmUVD_JRBC_EXTERNAL_REG_BASE), 0, 0, PACKETJ_TYPE0);
+   if (((reg_offset >= 0x1f800) && (reg_offset <= 0x21fff)) ||
+   ((reg_offset >= 0x1e000) && (reg_offset <= 
0x1e1ff))) {
+   ring->ring[(*ptr)++] = 0;
+   ring->ring[(*ptr)++] = PACKETJ((reg_offset >> 2), 0, 0, 
PACKETJ_TYPE0);
+   } else {
+   ring->ring[(*ptr)++] = reg_offset;
+   ring->ring[(*ptr)++] = PACKETJ(0, 0, 0, PACKETJ_TYPE0);
+   }
+   ring->ring[(*ptr)++] = val;
+}
+
+static void vcn_v1_0_jpeg_ring_set_patch_ring(struct amdgpu_ring *ring, 
uint32_t ptr)
+{
+   struct amdgpu_device *adev = ring->adev;
+
+   uint32_t reg, reg_offset, val, mask, i;
+
+   // 1st: program mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_LOW
+   reg = SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_LOW);
+   reg_offset = (reg << 2);
+   val = lower_32_bits(ring->gpu_addr);
+   vcn_v1_0_jpeg_ring_patch_wreg(ring, , reg_offset, val);
+
+   // 2nd: program mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_HIGH
+   reg = SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_HIGH);
+   reg_offset = (reg << 2);
+   val = upper_32_bits(ring->gpu_addr);
+   vcn_v1_0_jpeg_ring_patch_wreg(ring, , reg_offset, val);
+
+   // 3rd to 5th: issue MEM_READ commands
+   for (i = 0; i <= 2; i++) {
+   ring->ring[ptr++] = PACKETJ(0, 0, 0, PACKETJ_TYPE2);
+   ring->ring[ptr++] = 0;
+   }
+
+   // 6th: program mmUVD_JRBC_RB_CNTL register to enable NO_FETCH and RPTR 
write ability
+   reg = SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_RB_CNTL);
+   reg_offset = (reg << 2);
+   val = 0x13;
+   vcn_v1_0_jpeg_ring_patch_wreg(ring, , reg_offset, val);
+
+   // 7th: program mmUVD_JRBC_RB_REF_DATA
+   reg = SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_RB_REF_DATA);
+   reg_offset = (reg << 2);
+   val = 0x1;
+   vcn_v1_0_jpeg_ring_patch_wreg(ring, , reg_offset, val);
+
+   // 8th: issue conditional register read mmUVD_JRBC_RB_CNTL
+   reg = SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_RB_CNTL);
+   reg_offset = (reg << 2);
+   val = 0x1;
+   mask = 0x1;
+
+   ring->ring[ptr++] = PACKETJ(SOC15_REG_OFFSET(UVD, 0, 
mmUVD_JRBC_RB_COND_RD_TIMER), 0, 0, PACKETJ_TYPE0);
+   ring->ring[ptr++] = 0x01400200;
+   ring->ring[ptr++] = PACKETJ(SOC15_REG_OFFSET(UVD, 0, 
mmUVD_JRBC_RB_REF_DATA), 0, 0, PACKETJ_TYPE0);
+   ring->ring[ptr++] = val;
+   ring->ring[ptr++] = PACKETJ(SOC15_REG_OFFSET(UVD, 0, 
mmUVD_JRBC_EXTERNAL_REG_BASE), 0, 0, PACKETJ_TYPE0);
+   if (((reg_offset >= 0x1f800) && (reg_offset <= 0x21fff)) ||
+   ((reg_offset >= 0x1e000) && (reg_offset <= 0x1e1ff))) {
+   ring->ring[ptr++] = 0;
+   ring->ring[ptr++] = PACKETJ((reg_offset >> 2), 0, 0, 
PACKETJ_TYPE3);
+   } else {
+   ring->ring[ptr++] = reg_offset;
+   ring->ring[ptr++] = PACKETJ(0, 0, 0, PACKETJ_TYPE3);
+   }
+   ring->ring[ptr++] = mask;
+
+   //9th to 21st: insert no-op
+   for (i = 0; i <= 12; i++) {
+   ring->ring[ptr++] = PACKETJ(0, 0, 0, PACKETJ_TYPE6);
+   ring->ring[ptr++] = 0;
+   }
+
+   //22nd: reset mmUVD_JRBC_RB_RPTR
+   reg = SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_RB_RPTR);
+   reg_offset = (reg << 2);
+   val = 0;
+   vcn_v1_0_jpeg_ring_patch_wreg(ring, , reg_offset, val);
+
+   //23rd: program mmUVD_JRBC_RB_CNTL to disable no_fetch
+   reg = SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_RB_CNTL);
+   reg_offset 

RE: [PATCH] drm/amdgpu: add kernel DOC for ioctls in amdgpu_cs.c file

2018-06-01 Thread Li, Samuel
I can see them now.

Sam

> -Original Message-
> From: amd-gfx [mailto:amd-gfx-boun...@lists.freedesktop.org] On Behalf
> Of Samuel Li
> Sent: Friday, June 01, 2018 5:10 PM
> To: Michel Dänzer ; Liu, Leo 
> Cc: amd-gfx@lists.freedesktop.org
> Subject: Re: [PATCH] drm/amdgpu: add kernel DOC for ioctls in amdgpu_cs.c
> file
> 
> 
> 
> On 2018-05-31 12:30 PM, Michel Dänzer wrote:
> > On 2018-05-30 08:42 PM, Leo Liu wrote:
> >> There are four ioctls in this files, and DOC gives details of data
> >> structures for each of ioctls, and their functionalities.
> >>
> >> Signed-off-by: Leo Liu 
> >
> > This isn't enough to actually make this part of the generated
> > documentation. It needs to be hooked up to a *.rst file for that.
> >
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] drm/amdgpu: add kernel DOC for ioctls in amdgpu_cs.c file

2018-06-01 Thread Samuel Li


On 2018-05-31 12:30 PM, Michel Dänzer wrote:
> On 2018-05-30 08:42 PM, Leo Liu wrote:
>> There are four ioctls in this files, and DOC gives details of
>> data structures for each of ioctls, and their functionalities.
>>
>> Signed-off-by: Leo Liu 
> 
> This isn't enough to actually make this part of the generated
> documentation. It needs to be hooked up to a *.rst file for that.
> 
> I'm adding an amdgpu.rst file in
> https://patchwork.freedesktop.org/series/44035/ , where you could hook
> it up accordingly.
Michel, have you submitted this one? So we can hook up the files gradually.
I do not see the patches in this list.

Sam

> 
> Please check that generating the documentation (e.g. with make htmldocs)
> doesn't produce any warnings about amdgpu_cs.c, and that the result
> looks good in Documentation/output/gpu/amdgpu.html . The documentation
> format itself is documented in Documentation/output/doc-guide/index.html .
> 
> 


[...]
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH v2 1/2] drm/scheduler: Avoid using wait_event_killable for dying process.

2018-06-01 Thread Andrey Grodzovsky



On 06/01/2018 01:22 PM, Christian König wrote:

Am 01.06.2018 um 19:11 schrieb Andrey Grodzovsky:

Dying process might be blocked from receiving any more signals
so avoid using it.

Also retire enity->fini_status and just check the SW queue,
if it's not empty do the fallback cleanup.

Also handle entity->last_scheduled == NULL use case which
happens when HW ring is already hangged whem a  new entity
tried to enqeue jobs.

v2:
Return the remaining timeout and use that as parameter for the next 
call.

This way when we need to cleanup multiple queues we don't wait for the
entire TO period for each queue but rather in total.
Styling comments.
Rebase.

Signed-off-by: Andrey Grodzovsky 
---
  drivers/gpu/drm/scheduler/gpu_scheduler.c | 74 
---

  include/drm/gpu_scheduler.h   |  7 +--
  2 files changed, 61 insertions(+), 20 deletions(-)

diff --git a/drivers/gpu/drm/scheduler/gpu_scheduler.c 
b/drivers/gpu/drm/scheduler/gpu_scheduler.c

index 8c1e80c..c594d17 100644
--- a/drivers/gpu/drm/scheduler/gpu_scheduler.c
+++ b/drivers/gpu/drm/scheduler/gpu_scheduler.c
@@ -181,7 +181,6 @@ int drm_sched_entity_init(struct 
drm_gpu_scheduler *sched,

  entity->rq = rq;
  entity->sched = sched;
  entity->guilty = guilty;
-    entity->fini_status = 0;
  entity->last_scheduled = NULL;
    spin_lock_init(>rq_lock);
@@ -219,7 +218,8 @@ static bool 
drm_sched_entity_is_initialized(struct drm_gpu_scheduler *sched,

  static bool drm_sched_entity_is_idle(struct drm_sched_entity *entity)
  {
  rmb();
-    if (spsc_queue_peek(>job_queue) == NULL)
+
+    if (!entity->rq || spsc_queue_peek(>job_queue) == NULL)
  return true;
    return false;
@@ -260,25 +260,48 @@ static void 
drm_sched_entity_kill_jobs_cb(struct dma_fence *f,

   *
   * @sched: scheduler instance
   * @entity: scheduler entity
+ * @timeout: time to wait in ms for Q to become empty.
   *
   * Splitting drm_sched_entity_fini() into two functions, The first 
one does the waiting,
   * removes the entity from the runqueue and returns an error when 
the process was killed.

+ *
+ * Returns amount of time spent in waiting for TO.
+ * 0 if wait wasn't with time out.
+ * MAX_WAIT_SCHED_ENTITY_Q_EMPTY_MS if wait timed out with condition 
false

+ * Number of MS spent in waiting before condition became true
+ *
   */
-void drm_sched_entity_do_release(struct drm_gpu_scheduler *sched,
-   struct drm_sched_entity *entity)
+unsigned drm_sched_entity_do_release(struct drm_gpu_scheduler *sched,
+   struct drm_sched_entity *entity, unsigned timeout)


Better use long for return type and timeout.


  {
+    unsigned ret = 0;


Also use a long here and initialize it with timeout.


Please see bellow




+
  if (!drm_sched_entity_is_initialized(sched, entity))
  return;
  /**
   * The client will not queue more IBs during this fini, consume 
existing

   * queued IBs or discard them on SIGKILL
  */
-    if ((current->flags & PF_SIGNALED) && current->exit_code == 
SIGKILL)

-    entity->fini_status = -ERESTARTSYS;
-    else
-    entity->fini_status = wait_event_killable(sched->job_scheduled,
-    drm_sched_entity_is_idle(entity));
-    drm_sched_entity_set_rq(entity, NULL);
+    if (current->flags & PF_EXITING) {
+    if (timeout) {
+    ret = jiffies_to_msecs(
+    wait_event_timeout(
+    sched->job_scheduled,
+    drm_sched_entity_is_idle(entity),
+    msecs_to_jiffies(timeout)));


Oh please don't use msecs as timeout, just use jiffies and let the 
caller do the conversion.



+
+    if (!ret)
+    ret = MAX_WAIT_SCHED_ENTITY_Q_EMPTY_MS;


Why that? It is common coding style to return 0 when a timeout occurs.

Christian.


What should i return when i do wait_event_killable, it's return values 
are opposite to wait_event_timeout...
This way returning 0 has no impact on remaining waiting time, dong it 
the other way will force the caller

to do some cumbersome logic instead of just

max_wait = max_wait >= ret ? max_wait - ret : 0;

like in amdgpu_ctx_mgr_entity_fini

Andrey



+    }
+    } else
+    wait_event_killable(sched->job_scheduled, 
drm_sched_entity_is_idle(entity));

+
+
+    /* For killed process disable any more IBs enqueue right now */
+    if ((current->flags & PF_EXITING) && (current->exit_code == 
SIGKILL))

+    drm_sched_entity_set_rq(entity, NULL);
+
+    return ret;
  }
  EXPORT_SYMBOL(drm_sched_entity_do_release);
  @@ -290,11 +313,18 @@ EXPORT_SYMBOL(drm_sched_entity_do_release);
   *
   * This should be called after @drm_sched_entity_do_release. It 
goes over the
   * entity and signals all jobs with an error code if the process 
was killed.

+ *
   */
  void drm_sched_entity_cleanup(struct drm_gpu_scheduler *sched,
 struct drm_sched_entity *entity)
  {
-    if 

Re: [PATCH] drm/amdgpu: fix ib test hang with gfxoff enabled

2018-06-01 Thread Felix Kuehling
On 2018-06-01 06:09 AM, Christian König wrote:
> Am 01.06.2018 um 11:29 schrieb Huang Rui:
>> On Fri, Jun 01, 2018 at 05:13:49PM +0800, Christian König wrote:
>>> Am 01.06.2018 um 08:41 schrieb Huang Rui:
 After defer the execution of gfx/compute ib tests. However, at that
 time, the
 gfx already go into "mid state" of gfxoff.

 PWR_MISC_CNTL_STATUS: PWR_GFXOFF_STATUS field (2:1 bits)
 0 = GFXOFF.
 1 = Transition out of GFXOFF state.
 2 = Not in GFXOFF.
 3 = Transition into GFXOFF.

 If hit the mid state (1 or 3), the doorbell writing interrupt
 cannot wake up the
 gfx back successfully. And the field value is 1 when we issue the
 ib test at
 that, so we got the hang. This is the root cause that we
 encountered the issue.

 Meanwhile, we cannot set clockgating of GFX after gfx is already in
 "off" state.
 So here we should move the gfx powergating and gfxoff enabling
 behavior at the
 end of initialization behind ib test and clockgating.
>>> Mhm, that still looks like a only halve backed solution:
>>>
>>> 1. What prevents this bug from happening during "normal" IB submission
>>> from userspace?
>>>
>>> 2. Shouldn't we poll the PWR_MISC_CNTL_STATUS register to make sure we
>>> are not in any transition phase instead?
>>>
>> Yes, right. How about also add polling of PWR_MISC_CNTL_STATUS in
>> amdgpu_ring_commit() behind set_wptr that confirm the status as "0"
>> or "2"?
>
> You could add an end_use() callback for that, but I think we rather
> need to do this in gfx_v9_0_ring_set_wptr_gfx() before we write the
> doorbell.
Isn't testing the status like this is a potential race condition.

Having to do this at all is contrary to the documentation that I've
read. Writing a doorbell should wake up the GFX engine. Are we sure that
we understand the cause of the problem correctly? Does the IB test use
any MMIO? Maybe it's doing an HDP flush using MMIO for a ring that
doesn't support HDP flushing.

Regards,
  Felix


>
> Christian.
>
>>
>> Thanks,
>> Ray
>
> ___
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 1/2] drm/doc: Add amdgpu hwmon/power documentation

2018-06-01 Thread Alex Deucher
Document the hwmon and power control interfaces exposed
by the amdgpu driver.

Signed-off-by: Alex Deucher 
---
 Documentation/gpu/amdgpu.rst   | 52 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c | 45 +++--
 2 files changed, 89 insertions(+), 8 deletions(-)

diff --git a/Documentation/gpu/amdgpu.rst b/Documentation/gpu/amdgpu.rst
index f557866f6788..1d726b90a619 100644
--- a/Documentation/gpu/amdgpu.rst
+++ b/Documentation/gpu/amdgpu.rst
@@ -18,3 +18,55 @@ PRIME Buffer Sharing
 
 .. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
:internal:
+
+GPU Power/Thermal Controls and Monitoring
+=
+
+This chapter covers hwmon and power/thermal controls.
+
+HWMON Interfaces
+
+
+.. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
+   :doc: hwmon
+
+GPU sysfs Power State Interfaces
+
+
+GPU power controls are exposed via sysfs files.
+
+power_dpm_state
+~~~
+
+.. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
+   :doc: power_dpm_state
+
+power_dpm_force_performance_level
+~
+
+.. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
+   :doc: power_dpm_force_performance_level
+
+pp_table
+
+
+.. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
+   :doc: pp_table
+
+pp_od_clk_voltage
+~
+
+.. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
+   :doc: pp_od_clk_voltage
+
+pp_dpm_sclk pp_dpm_mclk pp_dpm_pcie
+~~~
+
+.. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
+   :doc: pp_dpm_sclk pp_dpm_mclk pp_dpm_pcie
+
+pp_power_profile_mode
+~
+
+.. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
+   :doc: pp_power_profile_mode
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
index b455da487782..f667cb9eb614 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
@@ -80,12 +80,15 @@ void amdgpu_pm_acpi_event_handler(struct amdgpu_device 
*adev)
 /**
  * DOC: power_dpm_state
  *
- * This is a legacy interface and is only provided for backwards compatibility.
- * The amdgpu driver provides a sysfs API for adjusting certain power
- * related parameters.  The file power_dpm_state is used for this.
+ * The power_dpm_state file is a legacy interface and is only provided for
+ * backwards compatibility. The amdgpu driver provides a sysfs API for 
adjusting
+ * certain power related parameters.  The file power_dpm_state is used for 
this.
  * It accepts the following arguments:
+ *
  * - battery
+ *
  * - balanced
+ *
  * - performance
  *
  * battery
@@ -169,14 +172,21 @@ static ssize_t amdgpu_set_dpm_state(struct device *dev,
  * The amdgpu driver provides a sysfs API for adjusting certain power
  * related parameters.  The file power_dpm_force_performance_level is
  * used for this.  It accepts the following arguments:
+ *
  * - auto
+ *
  * - low
+ *
  * - high
+ *
  * - manual
- * - GPU fan
+ *
  * - profile_standard
+ *
  * - profile_min_sclk
+ *
  * - profile_min_mclk
+ *
  * - profile_peak
  *
  * auto
@@ -463,8 +473,11 @@ static ssize_t amdgpu_set_pp_table(struct device *dev,
  * this.
  *
  * Reading the file will display:
+ *
  * - a list of engine clock levels and voltages labeled OD_SCLK
+ *
  * - a list of memory clock levels and voltages labeled OD_MCLK
+ *
  * - a list of valid ranges for sclk, mclk, and voltage labeled OD_RANGE
  *
  * To manually adjust these settings, first select manual using
@@ -1285,35 +1298,51 @@ static ssize_t amdgpu_hwmon_set_power_cap(struct device 
*dev,
  * DOC: hwmon
  *
  * The amdgpu driver exposes the following sensor interfaces:
+ *
  * - GPU temperature (via the on-die sensor)
+ *
  * - GPU voltage
+ *
  * - Northbridge voltage (APUs only)
+ *
  * - GPU power
+ *
  * - GPU fan
  *
  * hwmon interfaces for GPU temperature:
+ *
  * - temp1_input: the on die GPU temperature in millidegrees Celsius
+ *
  * - temp1_crit: temperature critical max value in millidegrees Celsius
+ *
  * - temp1_crit_hyst: temperature hysteresis for critical limit in 
millidegrees Celsius
  *
  * hwmon interfaces for GPU voltage:
+ *
  * - in0_input: the voltage on the GPU in millivolts
+ *
  * - in1_input: the voltage on the Northbridge in millivolts
  *
  * hwmon interfaces for GPU power:
+ *
  * - power1_average: average power used by the GPU in microWatts
+ *
  * - power1_cap_min: minimum cap supported in microWatts
+ *
  * - power1_cap_max: maximum cap supported in microWatts
+ *
  * - power1_cap: selected power cap in microWatts
  *
  * hwmon interfaces for GPU fan:
+ *
  * - pwm1: pulse width modulation fan level (0-255)
- * - pwm1_enable: pulse width modulation fan control method
- *0: no fan speed control
- *1: manual fan speed control using pwm interface
- *2: automatic fan 

[PATCH 2/2] drm/doc: Make naming consistent for Core Driver Infrastructure

2018-06-01 Thread Alex Deucher
Use chapter rather than section to align with the rst markup.

Signed-off-by: Alex Deucher 
---
 Documentation/gpu/amdgpu.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Documentation/gpu/amdgpu.rst b/Documentation/gpu/amdgpu.rst
index 1d726b90a619..e99732553c71 100644
--- a/Documentation/gpu/amdgpu.rst
+++ b/Documentation/gpu/amdgpu.rst
@@ -8,7 +8,7 @@ Next (GCN) architecture.
 Core Driver Infrastructure
 ==
 
-This section covers core driver infrastructure.
+This chapter covers core driver infrastructure.
 
 PRIME Buffer Sharing
 
-- 
2.13.6

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 09/18] drm/amdgpu: implement patch for fixing a known bug

2018-06-01 Thread Christian König

Am 01.06.2018 um 18:35 schrieb boyuan.zh...@amd.com:

From: Boyuan Zhang 

Implement a patch to maunally reset read pointer

v2: using ring assignment instead of amdgpu_ring_write. adding comments
for each steps in the patch function.

v3: fixing a typo bug.

Signed-off-by: Boyuan Zhang 
---
  drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c | 92 +++
  1 file changed, 92 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c 
b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
index ea1d677..e8d24a6 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
@@ -40,6 +40,7 @@ static void vcn_v1_0_set_dec_ring_funcs(struct amdgpu_device 
*adev);
  static void vcn_v1_0_set_enc_ring_funcs(struct amdgpu_device *adev);
  static void vcn_v1_0_set_jpeg_ring_funcs(struct amdgpu_device *adev);
  static void vcn_v1_0_set_irq_funcs(struct amdgpu_device *adev);
+static void vcn_v1_0_jpeg_ring_set_patch_ring(struct amdgpu_ring *ring, 
uint32_t ptr);
  
  /**

   * vcn_v1_0_early_init - set function pointers
@@ -1442,6 +1443,97 @@ static void vcn_v1_0_jpeg_ring_nop(struct amdgpu_ring 
*ring, uint32_t count)
}
  }
  
+static void vcn_v1_0_jpeg_ring_patch_wreg(struct amdgpu_ring *ring, uint32_t , uint32_t reg_offset, uint32_t val)


 only works in C++, you need to use *ptr here :)

But apart from that the patch now looks good to me,
Christian.


+{
+   struct amdgpu_device *adev = ring->adev;
+   ring->ring[ptr++] = PACKETJ(SOC15_REG_OFFSET(UVD, 0, 
mmUVD_JRBC_EXTERNAL_REG_BASE), 0, 0, PACKETJ_TYPE0);
+   if (((reg_offset >= 0x1f800) && (reg_offset <= 0x21fff)) ||
+   ((reg_offset >= 0x1e000) && (reg_offset <= 
0x1e1ff))) {
+   ring->ring[ptr++] = 0;
+   ring->ring[ptr++] = PACKETJ((reg_offset >> 2), 0, 0, 
PACKETJ_TYPE0);
+   } else {
+   ring->ring[ptr++] = reg_offset;
+   ring->ring[ptr++] = PACKETJ(0, 0, 0, PACKETJ_TYPE0);
+   }
+   ring->ring[ptr++] = val;
+}
+
+static void vcn_v1_0_jpeg_ring_set_patch_ring(struct amdgpu_ring *ring, 
uint32_t ptr)
+{
+   struct amdgpu_device *adev = ring->adev;
+
+   uint32_t reg, reg_offset, val, mask, i;
+
+   // 1st: program mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_LOW
+   reg = SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_LOW);
+   reg_offset = (reg << 2);
+   val = lower_32_bits(ring->gpu_addr);
+   vcn_v1_0_jpeg_ring_patch_wreg(ring, ptr, reg_offset, val);
+
+   // 2nd: program mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_HIGH
+   reg = SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_HIGH);
+   reg_offset = (reg << 2);
+   val = upper_32_bits(ring->gpu_addr);
+   vcn_v1_0_jpeg_ring_patch_wreg(ring, ptr, reg_offset, val);
+
+   // 3rd to 5th: issue MEM_READ commands
+   for (i = 0; i <= 2; i++) {
+   ring->ring[ptr++] = PACKETJ(0, 0, 0, PACKETJ_TYPE2);
+   ring->ring[ptr++] = 0;
+   }
+
+   // 6th: program mmUVD_JRBC_RB_CNTL register to enable NO_FETCH and RPTR 
write ability
+   reg = SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_RB_CNTL);
+   reg_offset = (reg << 2);
+   val = 0x13;
+   vcn_v1_0_jpeg_ring_patch_wreg(ring, ptr, reg_offset, val);
+
+   // 7th: program mmUVD_JRBC_RB_REF_DATA
+   reg = SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_RB_REF_DATA);
+   reg_offset = (reg << 2);
+   val = 0x1;
+   vcn_v1_0_jpeg_ring_patch_wreg(ring, ptr, reg_offset, val);
+
+   // 8th: issue conditional register read mmUVD_JRBC_RB_CNTL
+   reg = SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_RB_CNTL);
+   reg_offset = (reg << 2);
+   val = 0x1;
+   mask = 0x1;
+
+   ring->ring[ptr++] = PACKETJ(SOC15_REG_OFFSET(UVD, 0, 
mmUVD_JRBC_RB_COND_RD_TIMER), 0, 0, PACKETJ_TYPE0);
+   ring->ring[ptr++] = 0x01400200;
+   ring->ring[ptr++] = PACKETJ(SOC15_REG_OFFSET(UVD, 0, 
mmUVD_JRBC_RB_REF_DATA), 0, 0, PACKETJ_TYPE0);
+   ring->ring[ptr++] = val;
+   ring->ring[ptr++] = PACKETJ(SOC15_REG_OFFSET(UVD, 0, 
mmUVD_JRBC_EXTERNAL_REG_BASE), 0, 0, PACKETJ_TYPE0);
+   if (((reg_offset >= 0x1f800) && (reg_offset <= 0x21fff)) ||
+   ((reg_offset >= 0x1e000) && (reg_offset <= 0x1e1ff))) {
+   ring->ring[ptr++] = 0;
+   ring->ring[ptr++] = PACKETJ((reg_offset >> 2), 0, 0, 
PACKETJ_TYPE3);
+   } else {
+   ring->ring[ptr++] = reg_offset;
+   ring->ring[ptr++] = PACKETJ(0, 0, 0, PACKETJ_TYPE3);
+   }
+   ring->ring[ptr++] = mask;
+
+   //9th to 21st: insert no-op
+   for (i = 0; i <= 12; i++) {
+   ring->ring[ptr++] = PACKETJ(0, 0, 0, PACKETJ_TYPE6);
+   ring->ring[ptr++] = 0;
+   }
+
+   //22nd: reset mmUVD_JRBC_RB_RPTR
+   reg = SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_RB_RPTR);
+   reg_offset = (reg << 2);
+   val = 0;
+   vcn_v1_0_jpeg_ring_patch_wreg(ring, ptr, 

Re: [PATCH v2 1/2] drm/scheduler: Avoid using wait_event_killable for dying process.

2018-06-01 Thread Christian König

Am 01.06.2018 um 19:11 schrieb Andrey Grodzovsky:

Dying process might be blocked from receiving any more signals
so avoid using it.

Also retire enity->fini_status and just check the SW queue,
if it's not empty do the fallback cleanup.

Also handle entity->last_scheduled == NULL use case which
happens when HW ring is already hangged whem a  new entity
tried to enqeue jobs.

v2:
Return the remaining timeout and use that as parameter for the next call.
This way when we need to cleanup multiple queues we don't wait for the
entire TO period for each queue but rather in total.
Styling comments.
Rebase.

Signed-off-by: Andrey Grodzovsky 
---
  drivers/gpu/drm/scheduler/gpu_scheduler.c | 74 ---
  include/drm/gpu_scheduler.h   |  7 +--
  2 files changed, 61 insertions(+), 20 deletions(-)

diff --git a/drivers/gpu/drm/scheduler/gpu_scheduler.c 
b/drivers/gpu/drm/scheduler/gpu_scheduler.c
index 8c1e80c..c594d17 100644
--- a/drivers/gpu/drm/scheduler/gpu_scheduler.c
+++ b/drivers/gpu/drm/scheduler/gpu_scheduler.c
@@ -181,7 +181,6 @@ int drm_sched_entity_init(struct drm_gpu_scheduler *sched,
entity->rq = rq;
entity->sched = sched;
entity->guilty = guilty;
-   entity->fini_status = 0;
entity->last_scheduled = NULL;
  
  	spin_lock_init(>rq_lock);

@@ -219,7 +218,8 @@ static bool drm_sched_entity_is_initialized(struct 
drm_gpu_scheduler *sched,
  static bool drm_sched_entity_is_idle(struct drm_sched_entity *entity)
  {
rmb();
-   if (spsc_queue_peek(>job_queue) == NULL)
+
+   if (!entity->rq || spsc_queue_peek(>job_queue) == NULL)
return true;
  
  	return false;

@@ -260,25 +260,48 @@ static void drm_sched_entity_kill_jobs_cb(struct 
dma_fence *f,
   *
   * @sched: scheduler instance
   * @entity: scheduler entity
+ * @timeout: time to wait in ms for Q to become empty.
   *
   * Splitting drm_sched_entity_fini() into two functions, The first one does 
the waiting,
   * removes the entity from the runqueue and returns an error when the process 
was killed.
+ *
+ * Returns amount of time spent in waiting for TO.
+ * 0 if wait wasn't with time out.
+ * MAX_WAIT_SCHED_ENTITY_Q_EMPTY_MS if wait timed out with condition false
+ * Number of MS spent in waiting before condition became true
+ *
   */
-void drm_sched_entity_do_release(struct drm_gpu_scheduler *sched,
-  struct drm_sched_entity *entity)
+unsigned drm_sched_entity_do_release(struct drm_gpu_scheduler *sched,
+  struct drm_sched_entity *entity, unsigned timeout)


Better use long for return type and timeout.


  {
+   unsigned ret = 0;


Also use a long here and initialize it with timeout.


+
if (!drm_sched_entity_is_initialized(sched, entity))
return;
/**
 * The client will not queue more IBs during this fini, consume existing
 * queued IBs or discard them on SIGKILL
*/
-   if ((current->flags & PF_SIGNALED) && current->exit_code == SIGKILL)
-   entity->fini_status = -ERESTARTSYS;
-   else
-   entity->fini_status = wait_event_killable(sched->job_scheduled,
-   drm_sched_entity_is_idle(entity));
-   drm_sched_entity_set_rq(entity, NULL);
+   if (current->flags & PF_EXITING) {
+   if (timeout) {
+   ret = jiffies_to_msecs(
+   wait_event_timeout(
+   sched->job_scheduled,
+   
drm_sched_entity_is_idle(entity),
+   
msecs_to_jiffies(timeout)));


Oh please don't use msecs as timeout, just use jiffies and let the 
caller do the conversion.



+
+   if (!ret)
+   ret = MAX_WAIT_SCHED_ENTITY_Q_EMPTY_MS;


Why that? It is common coding style to return 0 when a timeout occurs.

Christian.


+   }
+   } else
+   wait_event_killable(sched->job_scheduled, 
drm_sched_entity_is_idle(entity));
+
+
+   /* For killed process disable any more IBs enqueue right now */
+   if ((current->flags & PF_EXITING) && (current->exit_code == SIGKILL))
+   drm_sched_entity_set_rq(entity, NULL);
+
+   return ret;
  }
  EXPORT_SYMBOL(drm_sched_entity_do_release);
  
@@ -290,11 +313,18 @@ EXPORT_SYMBOL(drm_sched_entity_do_release);

   *
   * This should be called after @drm_sched_entity_do_release. It goes over the
   * entity and signals all jobs with an error code if the process was killed.
+ *
   */
  void drm_sched_entity_cleanup(struct drm_gpu_scheduler *sched,
   struct drm_sched_entity *entity)
  {
-   if (entity->fini_status) {
+
+   drm_sched_entity_set_rq(entity, NULL);
+
+   /* Consumption of existing IBs wasn't completed. Forcefully
+

[PATCH v2 2/2] drm/amdgpu: move amdgpu_ctx_mgr_entity_fini to f_ops flush hook.

2018-06-01 Thread Andrey Grodzovsky
With this we can now terminate jobs enqueue into SW queue the moment
the task is being killed instead of waiting for last user of
drm file to release it.

Also stop checking for kref_read(>refcount) == 1 when
calling drm_sched_entity_do_release since other task
might still hold a reference to this entity but we don't
care since KILL means terminate job submission regardless
of what other tasks are doing.

v2:
Use returned remaining timeout as parameter for the next call.
Rebase.

Signed-off-by: Andrey Grodzovsky 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 17 +++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 12 
 drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c |  1 -
 3 files changed, 23 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
index c5bb362..cbf2fcc 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
@@ -449,26 +449,31 @@ void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr 
*mgr)
struct amdgpu_ctx *ctx;
struct idr *idp;
uint32_t id, i;
+   unsigned ret;
+   unsigned max_wait = MAX_WAIT_SCHED_ENTITY_Q_EMPTY_MS;
 
idp = >ctx_handles;
 
+   mutex_lock(>lock);
idr_for_each_entry(idp, ctx, id) {
 
-   if (!ctx->adev)
+   if (!ctx->adev) {
+   mutex_unlock(>lock);
return;
+   }
 
for (i = 0; i < ctx->adev->num_rings; i++) {
 
if (ctx->adev->rings[i] == >adev->gfx.kiq.ring)
continue;
 
-   if (kref_read(>refcount) == 1)
-   
drm_sched_entity_do_release(>adev->rings[i]->sched,
- >rings[i].entity);
-   else
-   DRM_ERROR("ctx %p is still alive\n", ctx);
+   ret = 
drm_sched_entity_do_release(>adev->rings[i]->sched,
+ >rings[i].entity, max_wait);
+
+   max_wait = max_wait >= ret ? max_wait - ret : 0;
}
}
+   mutex_unlock(>lock);
 }
 
 void amdgpu_ctx_mgr_entity_cleanup(struct amdgpu_ctx_mgr *mgr)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index b0bf2f2..a549483 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -855,9 +855,21 @@ static const struct dev_pm_ops amdgpu_pm_ops = {
.runtime_idle = amdgpu_pmops_runtime_idle,
 };
 
+static int amdgpu_flush(struct file *f, fl_owner_t id)
+{
+   struct drm_file *file_priv = f->private_data;
+   struct amdgpu_fpriv *fpriv = file_priv->driver_priv;
+
+   amdgpu_ctx_mgr_entity_fini(>ctx_mgr);
+
+   return 0;
+}
+
+
 static const struct file_operations amdgpu_driver_kms_fops = {
.owner = THIS_MODULE,
.open = drm_open,
+   .flush = amdgpu_flush,
.release = drm_release,
.unlocked_ioctl = amdgpu_drm_ioctl,
.mmap = amdgpu_mmap,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
index ca21549..1239384 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -930,7 +930,6 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev,
return;
 
pm_runtime_get_sync(dev->dev);
-   amdgpu_ctx_mgr_entity_fini(>ctx_mgr);
 
if (adev->asic_type != CHIP_RAVEN) {
amdgpu_uvd_free_handles(adev, file_priv);
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH v2 1/2] drm/scheduler: Avoid using wait_event_killable for dying process.

2018-06-01 Thread Andrey Grodzovsky
Dying process might be blocked from receiving any more signals
so avoid using it.

Also retire enity->fini_status and just check the SW queue,
if it's not empty do the fallback cleanup.

Also handle entity->last_scheduled == NULL use case which
happens when HW ring is already hangged whem a  new entity
tried to enqeue jobs.

v2:
Return the remaining timeout and use that as parameter for the next call.
This way when we need to cleanup multiple queues we don't wait for the
entire TO period for each queue but rather in total.
Styling comments.
Rebase.

Signed-off-by: Andrey Grodzovsky 
---
 drivers/gpu/drm/scheduler/gpu_scheduler.c | 74 ---
 include/drm/gpu_scheduler.h   |  7 +--
 2 files changed, 61 insertions(+), 20 deletions(-)

diff --git a/drivers/gpu/drm/scheduler/gpu_scheduler.c 
b/drivers/gpu/drm/scheduler/gpu_scheduler.c
index 8c1e80c..c594d17 100644
--- a/drivers/gpu/drm/scheduler/gpu_scheduler.c
+++ b/drivers/gpu/drm/scheduler/gpu_scheduler.c
@@ -181,7 +181,6 @@ int drm_sched_entity_init(struct drm_gpu_scheduler *sched,
entity->rq = rq;
entity->sched = sched;
entity->guilty = guilty;
-   entity->fini_status = 0;
entity->last_scheduled = NULL;
 
spin_lock_init(>rq_lock);
@@ -219,7 +218,8 @@ static bool drm_sched_entity_is_initialized(struct 
drm_gpu_scheduler *sched,
 static bool drm_sched_entity_is_idle(struct drm_sched_entity *entity)
 {
rmb();
-   if (spsc_queue_peek(>job_queue) == NULL)
+
+   if (!entity->rq || spsc_queue_peek(>job_queue) == NULL)
return true;
 
return false;
@@ -260,25 +260,48 @@ static void drm_sched_entity_kill_jobs_cb(struct 
dma_fence *f,
  *
  * @sched: scheduler instance
  * @entity: scheduler entity
+ * @timeout: time to wait in ms for Q to become empty.
  *
  * Splitting drm_sched_entity_fini() into two functions, The first one does 
the waiting,
  * removes the entity from the runqueue and returns an error when the process 
was killed.
+ *
+ * Returns amount of time spent in waiting for TO.
+ * 0 if wait wasn't with time out.
+ * MAX_WAIT_SCHED_ENTITY_Q_EMPTY_MS if wait timed out with condition false
+ * Number of MS spent in waiting before condition became true
+ *
  */
-void drm_sched_entity_do_release(struct drm_gpu_scheduler *sched,
-  struct drm_sched_entity *entity)
+unsigned drm_sched_entity_do_release(struct drm_gpu_scheduler *sched,
+  struct drm_sched_entity *entity, unsigned timeout)
 {
+   unsigned ret = 0;
+
if (!drm_sched_entity_is_initialized(sched, entity))
return;
/**
 * The client will not queue more IBs during this fini, consume existing
 * queued IBs or discard them on SIGKILL
*/
-   if ((current->flags & PF_SIGNALED) && current->exit_code == SIGKILL)
-   entity->fini_status = -ERESTARTSYS;
-   else
-   entity->fini_status = wait_event_killable(sched->job_scheduled,
-   drm_sched_entity_is_idle(entity));
-   drm_sched_entity_set_rq(entity, NULL);
+   if (current->flags & PF_EXITING) {
+   if (timeout) {
+   ret = jiffies_to_msecs(
+   wait_event_timeout(
+   sched->job_scheduled,
+   
drm_sched_entity_is_idle(entity),
+   
msecs_to_jiffies(timeout)));
+
+   if (!ret)
+   ret = MAX_WAIT_SCHED_ENTITY_Q_EMPTY_MS;
+   }
+   } else
+   wait_event_killable(sched->job_scheduled, 
drm_sched_entity_is_idle(entity));
+
+
+   /* For killed process disable any more IBs enqueue right now */
+   if ((current->flags & PF_EXITING) && (current->exit_code == SIGKILL))
+   drm_sched_entity_set_rq(entity, NULL);
+
+   return ret;
 }
 EXPORT_SYMBOL(drm_sched_entity_do_release);
 
@@ -290,11 +313,18 @@ EXPORT_SYMBOL(drm_sched_entity_do_release);
  *
  * This should be called after @drm_sched_entity_do_release. It goes over the
  * entity and signals all jobs with an error code if the process was killed.
+ *
  */
 void drm_sched_entity_cleanup(struct drm_gpu_scheduler *sched,
   struct drm_sched_entity *entity)
 {
-   if (entity->fini_status) {
+
+   drm_sched_entity_set_rq(entity, NULL);
+
+   /* Consumption of existing IBs wasn't completed. Forcefully
+* remove them here.
+*/
+   if (spsc_queue_peek(>job_queue)) {
struct drm_sched_job *job;
int r;
 
@@ -314,12 +344,22 @@ void drm_sched_entity_cleanup(struct drm_gpu_scheduler 
*sched,
struct drm_sched_fence *s_fence = job->s_fence;
drm_sched_fence_scheduled(s_fence);

Re: [PATCH 09/18] drm/amdgpu: implement patch for fixing a known bug

2018-06-01 Thread Alex Deucher
On Fri, Jun 1, 2018 at 12:35 PM,   wrote:
> From: Boyuan Zhang 
>
> Implement a patch to maunally reset read pointer
>
> v2: using ring assignment instead of amdgpu_ring_write. adding comments
> for each steps in the patch function.
>
> v3: fixing a typo bug.
>
> Signed-off-by: Boyuan Zhang 
> ---
>  drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c | 92 
> +++
>  1 file changed, 92 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c 
> b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
> index ea1d677..e8d24a6 100644
> --- a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
> @@ -40,6 +40,7 @@ static void vcn_v1_0_set_dec_ring_funcs(struct 
> amdgpu_device *adev);
>  static void vcn_v1_0_set_enc_ring_funcs(struct amdgpu_device *adev);
>  static void vcn_v1_0_set_jpeg_ring_funcs(struct amdgpu_device *adev);
>  static void vcn_v1_0_set_irq_funcs(struct amdgpu_device *adev);
> +static void vcn_v1_0_jpeg_ring_set_patch_ring(struct amdgpu_ring *ring, 
> uint32_t ptr);
>
>  /**
>   * vcn_v1_0_early_init - set function pointers
> @@ -1442,6 +1443,97 @@ static void vcn_v1_0_jpeg_ring_nop(struct amdgpu_ring 
> *ring, uint32_t count)
> }
>  }
>
> +static void vcn_v1_0_jpeg_ring_patch_wreg(struct amdgpu_ring *ring, uint32_t 
> , uint32_t reg_offset, uint32_t val)
> +{
> +   struct amdgpu_device *adev = ring->adev;
> +   ring->ring[ptr++] = PACKETJ(SOC15_REG_OFFSET(UVD, 0, 
> mmUVD_JRBC_EXTERNAL_REG_BASE), 0, 0, PACKETJ_TYPE0);
> +   if (((reg_offset >= 0x1f800) && (reg_offset <= 0x21fff)) ||
> +   ((reg_offset >= 0x1e000) && (reg_offset <= 
> 0x1e1ff))) {
> +   ring->ring[ptr++] = 0;
> +   ring->ring[ptr++] = PACKETJ((reg_offset >> 2), 0, 0, 
> PACKETJ_TYPE0);
> +   } else {
> +   ring->ring[ptr++] = reg_offset;
> +   ring->ring[ptr++] = PACKETJ(0, 0, 0, PACKETJ_TYPE0);
> +   }
> +   ring->ring[ptr++] = val;
> +}
> +
> +static void vcn_v1_0_jpeg_ring_set_patch_ring(struct amdgpu_ring *ring, 
> uint32_t ptr)
> +{
> +   struct amdgpu_device *adev = ring->adev;
> +
> +   uint32_t reg, reg_offset, val, mask, i;
> +
> +   // 1st: program mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_LOW

Please switch to C style /* */ comments before committing.

Alex

> +   reg = SOC15_REG_OFFSET(UVD, 0, 
> mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_LOW);
> +   reg_offset = (reg << 2);
> +   val = lower_32_bits(ring->gpu_addr);
> +   vcn_v1_0_jpeg_ring_patch_wreg(ring, ptr, reg_offset, val);
> +
> +   // 2nd: program mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_HIGH
> +   reg = SOC15_REG_OFFSET(UVD, 0, 
> mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_HIGH);
> +   reg_offset = (reg << 2);
> +   val = upper_32_bits(ring->gpu_addr);
> +   vcn_v1_0_jpeg_ring_patch_wreg(ring, ptr, reg_offset, val);
> +
> +   // 3rd to 5th: issue MEM_READ commands
> +   for (i = 0; i <= 2; i++) {
> +   ring->ring[ptr++] = PACKETJ(0, 0, 0, PACKETJ_TYPE2);
> +   ring->ring[ptr++] = 0;
> +   }
> +
> +   // 6th: program mmUVD_JRBC_RB_CNTL register to enable NO_FETCH and 
> RPTR write ability
> +   reg = SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_RB_CNTL);
> +   reg_offset = (reg << 2);
> +   val = 0x13;
> +   vcn_v1_0_jpeg_ring_patch_wreg(ring, ptr, reg_offset, val);
> +
> +   // 7th: program mmUVD_JRBC_RB_REF_DATA
> +   reg = SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_RB_REF_DATA);
> +   reg_offset = (reg << 2);
> +   val = 0x1;
> +   vcn_v1_0_jpeg_ring_patch_wreg(ring, ptr, reg_offset, val);
> +
> +   // 8th: issue conditional register read mmUVD_JRBC_RB_CNTL
> +   reg = SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_RB_CNTL);
> +   reg_offset = (reg << 2);
> +   val = 0x1;
> +   mask = 0x1;
> +
> +   ring->ring[ptr++] = PACKETJ(SOC15_REG_OFFSET(UVD, 0, 
> mmUVD_JRBC_RB_COND_RD_TIMER), 0, 0, PACKETJ_TYPE0);
> +   ring->ring[ptr++] = 0x01400200;
> +   ring->ring[ptr++] = PACKETJ(SOC15_REG_OFFSET(UVD, 0, 
> mmUVD_JRBC_RB_REF_DATA), 0, 0, PACKETJ_TYPE0);
> +   ring->ring[ptr++] = val;
> +   ring->ring[ptr++] = PACKETJ(SOC15_REG_OFFSET(UVD, 0, 
> mmUVD_JRBC_EXTERNAL_REG_BASE), 0, 0, PACKETJ_TYPE0);
> +   if (((reg_offset >= 0x1f800) && (reg_offset <= 0x21fff)) ||
> +   ((reg_offset >= 0x1e000) && (reg_offset <= 0x1e1ff))) 
> {
> +   ring->ring[ptr++] = 0;
> +   ring->ring[ptr++] = PACKETJ((reg_offset >> 2), 0, 0, 
> PACKETJ_TYPE3);
> +   } else {
> +   ring->ring[ptr++] = reg_offset;
> +   ring->ring[ptr++] = PACKETJ(0, 0, 0, PACKETJ_TYPE3);
> +   }
> +   ring->ring[ptr++] = mask;
> +
> +   //9th to 21st: insert no-op
> +   for (i = 0; i <= 12; i++) {
> +   ring->ring[ptr++] = PACKETJ(0, 0, 0, PACKETJ_TYPE6);
> +   ring->ring[ptr++] = 0;
> +   }
> +
> +   //22nd: reset 

Re: [PATCH 01/18] drm/amdgpu: define vcn jpeg ring

2018-06-01 Thread Boyuan Zhang



On 2018-06-01 12:14 PM, Boyuan Zhang wrote:



On 2018-06-01 12:02 PM, Boyuan Zhang wrote:



On 2018-06-01 04:36 AM, Christian König wrote:
Patches #1 - #8 and patches #11-#18 are Reviewed-by: Christian König 
.


Patch #9:

static void vcn_v1_0_jpeg_ring_patch_wreg(struct amdgpu_ring *ring, 
uint32_t ptr, uint32_t reg_offset, uint32_t val)

That you don't pass ptr by reference here looks like a bug to me.


Thanks a lot for catching this typo bug. Just sent out Patch#9 v3 and 
Patch#10 v2.


Regards,
Boyuan







Patch #10:

+    .extra_dw = 0,
I think we should either drop that or add it to all the other rings 
as well. I certainly prefer to just drop it, cause it's less 
maintenance work.


Thanks for the review. I will drop it accordingly.

Regards,
Boyuan



Apart from that the patch is Reviewed-by: Christian König 
 as well.


Thanks,
Christian.

Am 30.05.2018 um 22:27 schrieb boyuan.zh...@amd.com:

From: Boyuan Zhang 

Add AMDGPU_RING_TYPE_VCN_JPEG ring define

Signed-off-by: Boyuan Zhang 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 3 ++-
  1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h

index 1513124c..a3908ef 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
@@ -53,7 +53,8 @@ enum amdgpu_ring_type {
  AMDGPU_RING_TYPE_KIQ,
  AMDGPU_RING_TYPE_UVD_ENC,
  AMDGPU_RING_TYPE_VCN_DEC,
-    AMDGPU_RING_TYPE_VCN_ENC
+    AMDGPU_RING_TYPE_VCN_ENC,
+    AMDGPU_RING_TYPE_VCN_JPEG
  };
    struct amdgpu_device;








___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 10/18] drm/amdgpu: define and add extra dword for jpeg ring

2018-06-01 Thread boyuan.zhang
From: Boyuan Zhang 

Define extra dword for jpeg ring. Jpeg ring will allocate extra dword to store
the patch commands for fixing the known issue.

v2: dropping extra_dw for rings other than jpeg.

Signed-off-by: Boyuan Zhang 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c | 2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 1 +
 drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c| 1 +
 3 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
index c6850b6..19e45a3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
@@ -304,7 +304,7 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct 
amdgpu_ring *ring,
0x : ring->buf_mask;
/* Allocate ring buffer */
if (ring->ring_obj == NULL) {
-   r = amdgpu_bo_create_kernel(adev, ring->ring_size, PAGE_SIZE,
+   r = amdgpu_bo_create_kernel(adev, ring->ring_size + 
ring->funcs->extra_dw, PAGE_SIZE,
AMDGPU_GEM_DOMAIN_GTT,
>ring_obj,
>gpu_addr,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
index a3908ef..a293f4e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
@@ -113,6 +113,7 @@ struct amdgpu_ring_funcs {
u32 nop;
boolsupport_64bit_ptrs;
unsignedvmhub;
+   unsignedextra_dw;
 
/* ring read/write ptr handling */
u64 (*get_rptr)(struct amdgpu_ring *ring);
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c 
b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
index e8d24a6..8d9f 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
@@ -1690,6 +1690,7 @@ static const struct amdgpu_ring_funcs 
vcn_v1_0_jpeg_ring_vm_funcs = {
.nop = PACKET0(0x81ff, 0),
.support_64bit_ptrs = false,
.vmhub = AMDGPU_MMHUB,
+   .extra_dw = 64,
.get_rptr = vcn_v1_0_jpeg_ring_get_rptr,
.get_wptr = vcn_v1_0_jpeg_ring_get_wptr,
.set_wptr = vcn_v1_0_jpeg_ring_set_wptr,
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 09/18] drm/amdgpu: implement patch for fixing a known bug

2018-06-01 Thread boyuan.zhang
From: Boyuan Zhang 

Implement a patch to maunally reset read pointer

v2: using ring assignment instead of amdgpu_ring_write. adding comments
for each steps in the patch function.

v3: fixing a typo bug.

Signed-off-by: Boyuan Zhang 
---
 drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c | 92 +++
 1 file changed, 92 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c 
b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
index ea1d677..e8d24a6 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
@@ -40,6 +40,7 @@ static void vcn_v1_0_set_dec_ring_funcs(struct amdgpu_device 
*adev);
 static void vcn_v1_0_set_enc_ring_funcs(struct amdgpu_device *adev);
 static void vcn_v1_0_set_jpeg_ring_funcs(struct amdgpu_device *adev);
 static void vcn_v1_0_set_irq_funcs(struct amdgpu_device *adev);
+static void vcn_v1_0_jpeg_ring_set_patch_ring(struct amdgpu_ring *ring, 
uint32_t ptr);
 
 /**
  * vcn_v1_0_early_init - set function pointers
@@ -1442,6 +1443,97 @@ static void vcn_v1_0_jpeg_ring_nop(struct amdgpu_ring 
*ring, uint32_t count)
}
 }
 
+static void vcn_v1_0_jpeg_ring_patch_wreg(struct amdgpu_ring *ring, uint32_t 
, uint32_t reg_offset, uint32_t val)
+{
+   struct amdgpu_device *adev = ring->adev;
+   ring->ring[ptr++] = PACKETJ(SOC15_REG_OFFSET(UVD, 0, 
mmUVD_JRBC_EXTERNAL_REG_BASE), 0, 0, PACKETJ_TYPE0);
+   if (((reg_offset >= 0x1f800) && (reg_offset <= 0x21fff)) ||
+   ((reg_offset >= 0x1e000) && (reg_offset <= 
0x1e1ff))) {
+   ring->ring[ptr++] = 0;
+   ring->ring[ptr++] = PACKETJ((reg_offset >> 2), 0, 0, 
PACKETJ_TYPE0);
+   } else {
+   ring->ring[ptr++] = reg_offset;
+   ring->ring[ptr++] = PACKETJ(0, 0, 0, PACKETJ_TYPE0);
+   }
+   ring->ring[ptr++] = val;
+}
+
+static void vcn_v1_0_jpeg_ring_set_patch_ring(struct amdgpu_ring *ring, 
uint32_t ptr)
+{
+   struct amdgpu_device *adev = ring->adev;
+
+   uint32_t reg, reg_offset, val, mask, i;
+
+   // 1st: program mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_LOW
+   reg = SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_LOW);
+   reg_offset = (reg << 2);
+   val = lower_32_bits(ring->gpu_addr);
+   vcn_v1_0_jpeg_ring_patch_wreg(ring, ptr, reg_offset, val);
+
+   // 2nd: program mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_HIGH
+   reg = SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_HIGH);
+   reg_offset = (reg << 2);
+   val = upper_32_bits(ring->gpu_addr);
+   vcn_v1_0_jpeg_ring_patch_wreg(ring, ptr, reg_offset, val);
+
+   // 3rd to 5th: issue MEM_READ commands
+   for (i = 0; i <= 2; i++) {
+   ring->ring[ptr++] = PACKETJ(0, 0, 0, PACKETJ_TYPE2);
+   ring->ring[ptr++] = 0;
+   }
+
+   // 6th: program mmUVD_JRBC_RB_CNTL register to enable NO_FETCH and RPTR 
write ability
+   reg = SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_RB_CNTL);
+   reg_offset = (reg << 2);
+   val = 0x13;
+   vcn_v1_0_jpeg_ring_patch_wreg(ring, ptr, reg_offset, val);
+
+   // 7th: program mmUVD_JRBC_RB_REF_DATA
+   reg = SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_RB_REF_DATA);
+   reg_offset = (reg << 2);
+   val = 0x1;
+   vcn_v1_0_jpeg_ring_patch_wreg(ring, ptr, reg_offset, val);
+
+   // 8th: issue conditional register read mmUVD_JRBC_RB_CNTL
+   reg = SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_RB_CNTL);
+   reg_offset = (reg << 2);
+   val = 0x1;
+   mask = 0x1;
+
+   ring->ring[ptr++] = PACKETJ(SOC15_REG_OFFSET(UVD, 0, 
mmUVD_JRBC_RB_COND_RD_TIMER), 0, 0, PACKETJ_TYPE0);
+   ring->ring[ptr++] = 0x01400200;
+   ring->ring[ptr++] = PACKETJ(SOC15_REG_OFFSET(UVD, 0, 
mmUVD_JRBC_RB_REF_DATA), 0, 0, PACKETJ_TYPE0);
+   ring->ring[ptr++] = val;
+   ring->ring[ptr++] = PACKETJ(SOC15_REG_OFFSET(UVD, 0, 
mmUVD_JRBC_EXTERNAL_REG_BASE), 0, 0, PACKETJ_TYPE0);
+   if (((reg_offset >= 0x1f800) && (reg_offset <= 0x21fff)) ||
+   ((reg_offset >= 0x1e000) && (reg_offset <= 0x1e1ff))) {
+   ring->ring[ptr++] = 0;
+   ring->ring[ptr++] = PACKETJ((reg_offset >> 2), 0, 0, 
PACKETJ_TYPE3);
+   } else {
+   ring->ring[ptr++] = reg_offset;
+   ring->ring[ptr++] = PACKETJ(0, 0, 0, PACKETJ_TYPE3);
+   }
+   ring->ring[ptr++] = mask;
+
+   //9th to 21st: insert no-op
+   for (i = 0; i <= 12; i++) {
+   ring->ring[ptr++] = PACKETJ(0, 0, 0, PACKETJ_TYPE6);
+   ring->ring[ptr++] = 0;
+   }
+
+   //22nd: reset mmUVD_JRBC_RB_RPTR
+   reg = SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_RB_RPTR);
+   reg_offset = (reg << 2);
+   val = 0;
+   vcn_v1_0_jpeg_ring_patch_wreg(ring, ptr, reg_offset, val);
+
+   //23rd: program mmUVD_JRBC_RB_CNTL to disable no_fetch
+   reg = SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_RB_CNTL);
+   reg_offset = (reg << 2);
+   val = 

Re: [PATCH 01/18] drm/amdgpu: define vcn jpeg ring

2018-06-01 Thread Boyuan Zhang



On 2018-06-01 12:02 PM, Boyuan Zhang wrote:



On 2018-06-01 04:36 AM, Christian König wrote:
Patches #1 - #8 and patches #11-#18 are Reviewed-by: Christian König 
.


Patch #9:

static void vcn_v1_0_jpeg_ring_patch_wreg(struct amdgpu_ring *ring, 
uint32_t ptr, uint32_t reg_offset, uint32_t val)

That you don't pass ptr by reference here looks like a bug to me.


Sorry for the confusion, I shouldn't call it "ptr" because this is 
actually an index to the ring->ring[]. I will re-name it to sth like 
"idx"


Oops, I misunderstand your comments. Will fix accordingly.

Thanks,
Boyuan





Patch #10:

+    .extra_dw = 0,
I think we should either drop that or add it to all the other rings 
as well. I certainly prefer to just drop it, cause it's less 
maintenance work.


Thanks for the review. I will drop it accordingly.

Regards,
Boyuan



Apart from that the patch is Reviewed-by: Christian König 
 as well.


Thanks,
Christian.

Am 30.05.2018 um 22:27 schrieb boyuan.zh...@amd.com:

From: Boyuan Zhang 

Add AMDGPU_RING_TYPE_VCN_JPEG ring define

Signed-off-by: Boyuan Zhang 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 3 ++-
  1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h

index 1513124c..a3908ef 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
@@ -53,7 +53,8 @@ enum amdgpu_ring_type {
  AMDGPU_RING_TYPE_KIQ,
  AMDGPU_RING_TYPE_UVD_ENC,
  AMDGPU_RING_TYPE_VCN_DEC,
-    AMDGPU_RING_TYPE_VCN_ENC
+    AMDGPU_RING_TYPE_VCN_ENC,
+    AMDGPU_RING_TYPE_VCN_JPEG
  };
    struct amdgpu_device;






___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH xf86-video-amdgpu 3/7] Configure color properties when creating output resources

2018-06-01 Thread sunpeng.li
From: "Leo (Sunpeng) Li" 

List color management properties on outputs if there's at least one
CRTC that supports color management. Otherwise, don't list them at
all.

If there's no CRTC attached to the output, and there exists a CRTC
that supports color management, then list "disabled" properties
(immutable and NULL-valued).

Signed-off-by: Leo (Sunpeng) Li 
---
 src/drmmode_display.c | 148 ++
 1 file changed, 148 insertions(+)

diff --git a/src/drmmode_display.c b/src/drmmode_display.c
index de09361..32ac441 100644
--- a/src/drmmode_display.c
+++ b/src/drmmode_display.c
@@ -769,6 +769,15 @@ static enum drmmode_cm_prop get_cm_enum_from_str(const 
char *prop_name)
 }
 
 /**
+ * Return TRUE if there's at least one CRTC that supports non-legacy color
+ * management. False otherwise.
+ */
+static Bool drmmode_cm_enabled(drmmode_ptr drmmode)
+{
+   return drmmode->cm_prop_ids[CM_GAMMA_LUT_SIZE] != 0;
+}
+
+/**
  * Return TRUE if the given CRTC supports non-legacy color management. False
  * otherwise.
  */
@@ -779,6 +788,123 @@ static Bool 
drmmode_crtc_cm_enabled(drmmode_crtc_private_ptr drmmode_crtc)
 }
 
 /**
+ * Configure and change a color property on a CRTC, through RandR. Only the
+ * specified output will be affected, even if the CRTC is attached to multiple
+ * outputs. Note that changes will be non-pending: the changes won't be pushed
+ * to kernel driver.
+ *
+ * @output: RandR output to set the property on.
+ * @crtc: The driver-private CRTC object containing the color properties.
+ *If this is NULL, "disabled" values of 0 will be used.
+ * @cm_prop_index: Color management property to configure and change.
+ *
+ * Return 0 on success, X-defined error code otherwise.
+ */
+static int rr_configure_and_change_cm_property(xf86OutputPtr output,
+  drmmode_crtc_private_ptr crtc,
+  enum drmmode_cm_prop 
cm_prop_index)
+{
+   Bool need_configure = TRUE;
+   unsigned long length = 0;
+   void *data = NULL;
+   int format = 0;
+   uint32_t zero = 0;
+   INT32 range[2];
+   Atom atom;
+   int err;
+
+   if (cm_prop_index == CM_INVALID_PROP)
+   return BadName;
+
+   atom = MakeAtom(cm_prop_names[cm_prop_index],
+   strlen(cm_prop_names[cm_prop_index]),
+   TRUE);
+   if (!atom)
+   return BadAlloc;
+
+   if (cm_prop_index == CM_GAMMA_LUT_SIZE) {
+   format = 32;
+   length = 1;
+   data = crtc ? >gamma_lut_size : 
+   range[0] = 0;
+   range[1] = -1;
+
+   } else if (cm_prop_index == CM_DEGAMMA_LUT_SIZE) {
+   format = 32;
+   length = 1;
+   data = crtc ? >degamma_lut_size : 
+   range[0] = 0;
+   range[1] = -1;
+
+   } else if (cm_prop_index == CM_GAMMA_LUT) {
+   format = 16;
+   range[0] = 0;
+   range[1] = (1 << 16) - 1; // Max 16 bit unsigned int.
+   if (crtc && crtc->gamma_lut) {
+   /* Convert from 8bit size to 16bit size */
+   length = sizeof(*crtc->gamma_lut) >> 1;
+   length *= crtc->gamma_lut_size;
+   data = crtc->gamma_lut;
+   } else {
+   length = 1;
+   data = 
+   }
+   } else if (cm_prop_index == CM_DEGAMMA_LUT) {
+   format = 16;
+   range[0] = 0;
+   range[1] = (1 << 16) - 1; // Max 16 bit unsigned int.
+   if (crtc && crtc->degamma_lut) {
+   /* Convert from 8bit size to 16bit size */
+   length = sizeof(*crtc->degamma_lut) >> 1;
+   length *= crtc->degamma_lut_size;
+   data = crtc->degamma_lut;
+   } else {
+   length = 1;
+   data = 
+   }
+   } else {
+   /* CTM is fixed-point S31.32 format. */
+   format = 32;
+   need_configure = FALSE;
+   if (crtc && crtc->ctm) {
+   /* Convert from 8bit size to 32bit size */
+   length = sizeof(*crtc->ctm) >> 2;
+   data = crtc->ctm;
+   } else {
+   length = 1;
+   data = 
+   }
+   }
+
+   if (need_configure) {
+   err = RRConfigureOutputProperty(output->randr_output, atom,
+   FALSE, TRUE, FALSE, 2, range);
+   if (err) {
+   xf86DrvMsg(output->scrn->scrnIndex, X_ERROR,
+  "Configuring color management property %s 
failed with %d\n",
+  

[PATCH xf86-video-amdgpu 2/7] Initialize color properties on CRTC during CRTC init

2018-06-01 Thread sunpeng.li
From: "Leo (Sunpeng) Li" 

And destroy them on the CRTC destroy hook.

When initializing color management properties on the private
drmmode_crtc, we want to:

1. Obtain its degamma and regamma LUT sizes
2. Default its color transform matrix (CTM) to identity
3. Program hardware with default color management values (SRGB for
   de/regamma, identity for CTM)

It's possible that cm initialization fails due to memory error or DRM
error. In which case, DDX support for color management will be disabled
on this CRTC.

Signed-off-by: Leo (Sunpeng) Li 
---
 src/drmmode_display.c | 189 +-
 src/drmmode_display.h |   8 +++
 2 files changed, 196 insertions(+), 1 deletion(-)

diff --git a/src/drmmode_display.c b/src/drmmode_display.c
index 36b22ad..de09361 100644
--- a/src/drmmode_display.c
+++ b/src/drmmode_display.c
@@ -768,6 +768,88 @@ static enum drmmode_cm_prop get_cm_enum_from_str(const 
char *prop_name)
return CM_INVALID_PROP;
 }
 
+/**
+ * Return TRUE if the given CRTC supports non-legacy color management. False
+ * otherwise.
+ */
+static Bool drmmode_crtc_cm_enabled(drmmode_crtc_private_ptr drmmode_crtc)
+{
+   return drmmode_crtc->gamma_lut_size > 0 &&
+  drmmode_crtc->degamma_lut_size > 0;
+}
+
+/**
+ * Push staged color management properties on the CRTC to DRM.
+ *
+ * @crtc: The CRTC containing staged properties
+ * @cm_prop_index: The color property to push
+ *
+ * Return 0 on success, X-defined error codes on failure.
+ */
+static int drmmode_crtc_push_cm_prop(xf86CrtcPtr crtc,
+enum drmmode_cm_prop cm_prop_index)
+{
+   drmmode_crtc_private_ptr drmmode_crtc = crtc->driver_private;
+   AMDGPUEntPtr pAMDGPUEnt = AMDGPUEntPriv(crtc->scrn);
+   uint32_t created_blob_id = 0;
+   uint32_t drm_prop_id;
+   size_t expected_bytes = 0;
+   void *blob_data = NULL;
+   int ret;
+
+   if (!drmmode_crtc_cm_enabled(drmmode_crtc))
+   return BadName;
+
+   if (cm_prop_index == CM_GAMMA_LUT) {
+   /* Calculate the expected size of value in bytes */
+   expected_bytes = sizeof(struct drm_color_lut) *
+   drmmode_crtc->gamma_lut_size;
+   blob_data = drmmode_crtc->gamma_lut;
+   } else if (cm_prop_index == CM_DEGAMMA_LUT) {
+   expected_bytes = sizeof(struct drm_color_lut) *
+   drmmode_crtc->degamma_lut_size;
+   blob_data = drmmode_crtc->degamma_lut;
+   } else if (cm_prop_index == CM_CTM) {
+   expected_bytes = sizeof(struct drm_color_ctm);
+   blob_data = drmmode_crtc->ctm;
+   } else
+   return BadName;
+
+   if (blob_data) {
+   ret = drmModeCreatePropertyBlob(pAMDGPUEnt->fd,
+   blob_data, expected_bytes,
+   _blob_id);
+   if (ret) {
+   xf86DrvMsg(crtc->scrn->scrnIndex, X_ERROR,
+  "Creating DRM blob failed with errno %d\n",
+  ret);
+   return BadRequest;
+   }
+   }
+
+   drm_prop_id = drmmode_crtc->drmmode->cm_prop_ids[cm_prop_index];
+   ret = drmModeObjectSetProperty(pAMDGPUEnt->fd,
+  drmmode_crtc->mode_crtc->crtc_id,
+  DRM_MODE_OBJECT_CRTC,
+  drm_prop_id,
+  (uint64_t)created_blob_id);
+
+   /* If successful, kernel will have a reference already. Safe to destroy
+* the blob either way.
+*/
+   if (blob_data)
+   drmModeDestroyPropertyBlob(pAMDGPUEnt->fd, created_blob_id);
+
+   if (ret) {
+   xf86DrvMsg(crtc->scrn->scrnIndex, X_ERROR,
+  "Setting DRM property blob failed with errno %d\n",
+  ret);
+   return BadRequest;
+   }
+
+   return Success;
+}
+
 static void
 drmmode_crtc_gamma_do_set(xf86CrtcPtr crtc, uint16_t *red, uint16_t *green,
  uint16_t *blue, int size)
@@ -1314,6 +1396,22 @@ static Bool drmmode_set_scanout_pixmap(xf86CrtcPtr crtc, 
PixmapPtr ppix)
return TRUE;
 }
 
+static void drmmode_crtc_destroy(xf86CrtcPtr crtc)
+{
+   drmmode_crtc_private_ptr drmmode_crtc = crtc->driver_private;
+
+   drmModeFreeCrtc(drmmode_crtc->mode_crtc);
+
+   /* Free LUTs and CTM */
+   free(drmmode_crtc->gamma_lut);
+   free(drmmode_crtc->degamma_lut);
+   free(drmmode_crtc->ctm);
+
+   free(drmmode_crtc);
+   crtc->driver_private = NULL;
+}
+
+
 static xf86CrtcFuncsRec drmmode_crtc_funcs = {
.dpms = drmmode_crtc_dpms,
.set_mode_major = drmmode_set_mode_major,
@@ -1330,7 +1428,7 @@ static 

[PATCH xf86-video-amdgpu 7/7] Also compose LUT when setting legacy gamma

2018-06-01 Thread sunpeng.li
From: "Leo (Sunpeng) Li" 

We compose the two LUTs when pushing non-legacy gamma changes, and the
same needs to be done when setting legacy gamma.

To do so, we just call push_cm_prop() on the gamma LUT. It will compose
the LUTs for us, and fall back to using legacy LUT (upscaled to non-
legacy size) if non-legacy is unavailable.

It's also possible that the CRTC has no support support for non-legacy
color. In which case, we fall back to legacy gamma.

Signed-off-by: Leo (Sunpeng) Li 
---
 src/drmmode_display.c | 16 ++--
 1 file changed, 14 insertions(+), 2 deletions(-)

diff --git a/src/drmmode_display.c b/src/drmmode_display.c
index b4e1d57..d31f975 100644
--- a/src/drmmode_display.c
+++ b/src/drmmode_display.c
@@ -1240,9 +1240,21 @@ drmmode_crtc_gamma_do_set(xf86CrtcPtr crtc, uint16_t 
*red, uint16_t *green,
 {
drmmode_crtc_private_ptr drmmode_crtc = crtc->driver_private;
AMDGPUEntPtr pAMDGPUEnt = AMDGPUEntPriv(crtc->scrn);
+   int ret;
+
+   /* Use legacy if the CRTC does not support non-legacy gamma */
+   if (!drmmode_crtc_cm_enabled(drmmode_crtc)) {
+   drmModeCrtcSetGamma(pAMDGPUEnt->fd,
+   drmmode_crtc->mode_crtc->crtc_id,
+   size, red, green, blue);
+   return;
+   }
 
-   drmModeCrtcSetGamma(pAMDGPUEnt->fd, drmmode_crtc->mode_crtc->crtc_id,
-   size, red, green, blue);
+   ret = drmmode_crtc_push_cm_prop(crtc, CM_GAMMA_LUT);
+   if (ret)
+   xf86DrvMsg(crtc->scrn->scrnIndex, X_ERROR,
+  "Setting Gamma LUT failed with errno %d\n",
+  ret);
 }
 
 Bool
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH xf86-video-amdgpu 6/7] Compose non-legacy with legacy regamma LUT

2018-06-01 Thread sunpeng.li
From: "Leo (Sunpeng) Li" 

Frequently, a user may have non-legacy gamma enabled for monitor
correction, while using legacy gamma for things like
redshift/nightlight.

To do so, we compose the two LUTs. Legacy gamma will be applied first,
then non-legacy. i.e. non-legacy_LUT(legacy_LUT(in_color)).

Note that the staged gamma LUT within the driver-private CRTC will
always contain the non-legacy LUT. This is to ensure that we have a
cached copy for future compositions.

Signed-off-by: Leo (Sunpeng) Li 
---
 src/drmmode_display.c | 177 +-
 1 file changed, 176 insertions(+), 1 deletion(-)

diff --git a/src/drmmode_display.c b/src/drmmode_display.c
index 6e5ae74..b4e1d57 100644
--- a/src/drmmode_display.c
+++ b/src/drmmode_display.c
@@ -788,6 +788,150 @@ static Bool 
drmmode_crtc_cm_enabled(drmmode_crtc_private_ptr drmmode_crtc)
 }
 
 /**
+ * If legacy LUT is a, and non-legacy LUT is b, then the result of b(a(x)) is
+ * returned in out_lut. out_lut's length is expected to be the same as the
+ * non-legacy LUT b.
+ *
+ * @a_(red|green|blue): The red, green, and blue components of the legacy LUT.
+ * @b_lut: The non-legacy LUT, in DRM's color LUT format.
+ * @out_lut: The composed LUT, in DRM's color LUT format.
+ * @len_a: Length of legacy lut.
+ * @len_b: Length of non-legacy lut.
+ */
+static void drmmode_lut_compose(uint16_t *a_red,
+   uint16_t *a_green,
+   uint16_t *a_blue,
+   struct drm_color_lut *b_lut,
+   struct drm_color_lut *out_lut,
+   uint32_t len_a, uint32_t len_b)
+{
+   uint32_t i_l, i_r, i;
+   uint32_t i_amax, i_bmax;
+   uint32_t coeff_ibmax;
+   uint32_t j;
+   uint64_t a_out_ibmax;
+   int color;
+   size_t struct_size = sizeof(struct drm_color_lut);
+
+   uint32_t max_lut = (1 << 16) - 1;
+
+   i_amax = len_a - 1;
+   i_bmax = len_b - 1;
+
+   /* A linear interpolation is done on the legacy LUT before it is
+* composed, to bring it up-to-size with the non-legacy LUT. The
+* interpolation uses integers by keeping things multiplied until the
+* last moment.
+*/
+   for (color = 0; color < 3; color++) {
+   uint16_t *a, *b, *out;
+
+   /* Set the initial pointers to the right color components. The
+* inner for-loop will then maintain the correct offset from
+* the initial element.
+*/
+   if (color == 0) {
+   a = a_red;
+   b = _lut[0].red;
+   out = _lut[0].red;
+   } else if (color == 1) {
+   a = a_green;
+   b = _lut[0].green;
+   out = _lut[0].green;
+   } else {
+   a = a_blue;
+   b = _lut[0].blue;
+   out = _lut[0].blue;
+   }
+
+   for (i = 0; i < len_b; i++) {
+   /* i_l and i_r tracks the left and right elements in
+* a_lut, to the sample point i. Also handle last
+* element edge case, when i_l = i_amax.
+*/
+   i_l = i * i_amax / i_bmax;
+   i_r = i_l + !!(i_amax - i_l);
+
+   /* coeff is intended to be in [0, 1), depending on
+* where sample i is between i_l and i_r. We keep it
+* multiplied with i_bmax throughout to maintain
+* precision */
+   coeff_ibmax = (i * i_amax) - (i_l * i_bmax);
+   a_out_ibmax = i_bmax * a[i_l] +
+ coeff_ibmax * (a[i_r] - a[i_l]);
+
+   /* j = floor((a_out/max_lut)*i_bmax).
+* i.e. the element in LUT b that a_out maps to. We
+* have to divide by max_lut to normalize a_out, since
+* values in the LUTs are [0, 1<<16)
+*/
+   j = a_out_ibmax / max_lut;
+   *(uint16_t*)((void*)out + (i*struct_size)) =
+   *(uint16_t*)((void*)b + (j*struct_size));
+   }
+   }
+
+   for (i = 0; i < len_b; i++)
+   out_lut[i].reserved = 0;
+}
+
+/**
+ * Resize a LUT, using linear interpolation.
+ *
+ * @in_(red|green|blue): Legacy LUT components
+ * @out_lut: The resized LUT is returned here, in DRM color LUT format.
+ * @len_in: Length of legacy LUT.
+ * @len_out: Length of out_lut, i.e. the target size.
+ */
+static void drmmode_lut_interpolate(uint16_t *in_red,
+   uint16_t *in_green,
+   uint16_t *in_blue,
+

[PATCH xf86-video-amdgpu 5/7] Enable setting of color properties via RandR

2018-06-01 Thread sunpeng.li
From: "Leo (Sunpeng) Li" 

Setting a color property involves:
1. Staging the property onto the driver-private CRTC object
2. Pushing the staged property into kernel DRM, for HW programming

Add a function to do the staging, and execute the above steps in
output_property_set.

Signed-off-by: Leo (Sunpeng) Li 
---
 src/drmmode_display.c | 97 +++
 1 file changed, 97 insertions(+)

diff --git a/src/drmmode_display.c b/src/drmmode_display.c
index 350dc3d..6e5ae74 100644
--- a/src/drmmode_display.c
+++ b/src/drmmode_display.c
@@ -905,6 +905,89 @@ static int 
rr_configure_and_change_cm_property(xf86OutputPtr output,
 }
 
 /**
+* Stage a color management property. This parses the property value, according
+* to the cm property type, then stores it within the driver-private CRTC
+* object.
+*
+* @crtc: The CRTC to stage the new color management properties in
+* @cm_prop_index: The color property to stage
+* @value: The RandR property value to stage
+*
+* Return 0 on success, X-defined error code on failure.
+*/
+static int drmmode_crtc_stage_cm_prop(xf86CrtcPtr crtc,
+ enum drmmode_cm_prop cm_prop_index,
+ RRPropertyValuePtr value)
+{
+   drmmode_crtc_private_ptr drmmode_crtc = crtc->driver_private;
+   size_t expected_bytes = 0;
+   void **blob_data = NULL;
+   Bool use_default = FALSE;
+
+   /* Early return if CRTC has no cm support */
+   if (!drmmode_crtc_cm_enabled(drmmode_crtc))
+   return BadName;
+
+   /* Update properties on the driver-private CRTC */
+   if (cm_prop_index == CM_GAMMA_LUT) {
+   /* Calculate the expected size of value in bytes */
+   expected_bytes = sizeof(struct drm_color_lut) *
+   drmmode_crtc->gamma_lut_size;
+
+   /* For gamma and degamma, we allow a default SRGB curve to be
+* set via setting a single element
+*
+* Otherwise, value size is in terms of the value format.
+* Ensure it's also in bytes (<< 1) before comparing with the
+* expected bytes.
+*/
+   if (value->size == 1)
+   use_default = TRUE;
+   else if (value->type != XA_INTEGER || value->format != 16 ||
+(size_t)(value->size << 1) != expected_bytes)
+   return BadLength;
+
+   blob_data = (void**)_crtc->gamma_lut;
+
+   } else if (cm_prop_index == CM_DEGAMMA_LUT) {
+   expected_bytes = sizeof(struct drm_color_lut) *
+   drmmode_crtc->degamma_lut_size;
+
+   if (value->size == 1)
+   use_default = TRUE;
+   else if (value->type != XA_INTEGER || value->format != 16 ||
+(size_t)(value->size << 1) != expected_bytes)
+   return BadLength;
+
+   blob_data = (void**)_crtc->degamma_lut;
+
+   } else if (cm_prop_index == CM_CTM) {
+   expected_bytes = sizeof(struct drm_color_ctm);
+
+   if (value->size == 1)
+   use_default = TRUE;
+   if (value->type != XA_INTEGER || value->format != 32 ||
+   (size_t)(value->size << 2) != expected_bytes)
+   return BadLength;
+
+   blob_data = (void**)_crtc->ctm;
+
+   } else
+   return BadName;
+
+   free(*blob_data);
+   if (!use_default) {
+   *blob_data = malloc(expected_bytes);
+   if (!*blob_data)
+   return BadAlloc;
+   memcpy(*blob_data, value->data, expected_bytes);
+   } else
+   *blob_data = NULL;
+
+   return Success;
+}
+
+/**
  * Push staged color management properties on the CRTC to DRM.
  *
  * @crtc: The CRTC containing staged properties
@@ -2126,8 +2209,22 @@ drmmode_output_set_property(xf86OutputPtr output, Atom 
property,
 {
drmmode_output_private_ptr drmmode_output = output->driver_private;
AMDGPUEntPtr pAMDGPUEnt = AMDGPUEntPriv(output->scrn);
+   enum drmmode_cm_prop cm_prop_index;
int i;
 
+   cm_prop_index = get_cm_enum_from_str(NameForAtom(property));
+   if (cm_prop_index >= 0 && cm_prop_index != CM_GAMMA_LUT_SIZE &&
+   cm_prop_index != CM_DEGAMMA_LUT_SIZE) {
+   if (!output->crtc)
+   return FALSE;
+   if (drmmode_crtc_stage_cm_prop(output->crtc, cm_prop_index,
+  value))
+   return FALSE;
+   if (drmmode_crtc_push_cm_prop(output->crtc, cm_prop_index))
+   return FALSE;
+   return TRUE;
+   }
+
for (i = 0; i < drmmode_output->num_props; i++) {
drmmode_prop_ptr p = 

[PATCH xf86-video-amdgpu 4/7] Update color properties on output_get_property

2018-06-01 Thread sunpeng.li
From: "Leo (Sunpeng) Li" 

Notify RandR of any updated color properties on the output's CRTC when
its get_property() hook is called.

Signed-off-by: Leo (Sunpeng) Li 
---
 src/drmmode_display.c | 24 
 1 file changed, 24 insertions(+)

diff --git a/src/drmmode_display.c b/src/drmmode_display.c
index 32ac441..350dc3d 100644
--- a/src/drmmode_display.c
+++ b/src/drmmode_display.c
@@ -2184,6 +2184,30 @@ drmmode_output_set_property(xf86OutputPtr output, Atom 
property,
 
 static Bool drmmode_output_get_property(xf86OutputPtr output, Atom property)
 {
+   drmmode_crtc_private_ptr drmmode_crtc;
+   enum drmmode_cm_prop cm_prop_id;
+   int ret;
+
+   /* First, see if it's a cm property */
+   cm_prop_id = get_cm_enum_from_str(NameForAtom(property));
+   if (output->crtc && cm_prop_id != CM_INVALID_PROP) {
+   drmmode_crtc = output->crtc->driver_private;
+
+   if (!drmmode_crtc_cm_enabled(drmmode_crtc))
+   return TRUE;
+
+   ret = rr_configure_and_change_cm_property(output, drmmode_crtc,
+ cm_prop_id);
+   if (ret) {
+   xf86DrvMsg(output->scrn->scrnIndex, X_ERROR,
+  "Error getting color property: %d\n",
+  ret);
+   return FALSE;
+   }
+   return TRUE;
+   }
+
+   /* Otherwise, must be an output property. */
return TRUE;
 }
 
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH xf86-video-amdgpu 1/7] Cache color property IDs during pre-init

2018-06-01 Thread sunpeng.li
From: "Leo (Sunpeng) Li" 

DRM creates property types with unique IDs during kernel driver init.
Cache the color property IDs on DDX init for use later, when we need
to modify these properties.

Since these IDs are the same regardless of the CRTC, they can be
cached within the private drmmode_rec object. We can also use any color-
management-enabled CRTC to initially fetch these IDs.

Also introduce an enumeration of possible color management properties,
to provide a easy and unified way of referring to them.

Signed-off-by: Leo (Sunpeng) Li 
---
 src/drmmode_display.c | 90 +++
 src/drmmode_display.h | 19 +++
 2 files changed, 109 insertions(+)

diff --git a/src/drmmode_display.c b/src/drmmode_display.c
index 8a1a201..36b22ad 100644
--- a/src/drmmode_display.c
+++ b/src/drmmode_display.c
@@ -746,6 +746,28 @@ drmmode_crtc_scanout_update(xf86CrtcPtr crtc, 
DisplayModePtr mode,
}
 }
 
+static char *cm_prop_names[] = {
+   "GAMMA_LUT_SIZE",
+   "DEGAMMA_LUT_SIZE",
+   "GAMMA_LUT",
+   "CTM",
+   "DEGAMMA_LUT",
+};
+
+/**
+ * Return the enum of the color management property with the given name.
+ */
+static enum drmmode_cm_prop get_cm_enum_from_str(const char *prop_name)
+{
+   enum drmmode_cm_prop ret;
+
+   for (ret = 0; ret < CM_NUM_PROPS; ret++) {
+   if (!strcmp(prop_name, cm_prop_names[ret]))
+   return ret;
+   }
+   return CM_INVALID_PROP;
+}
+
 static void
 drmmode_crtc_gamma_do_set(xf86CrtcPtr crtc, uint16_t *red, uint16_t *green,
  uint16_t *blue, int size)
@@ -2413,6 +2435,72 @@ drmmode_page_flip_target_relative(AMDGPUEntPtr 
pAMDGPUEnt,
 drm_queue_seq);
 }
 
+/**
+ * Cache DRM color management property type IDs. This should be done during DDX
+ * initialization, as these IDs do not change. They will be used later to
+ * modify color management via DRM, or to determine if there's any CRTC(s) that
+ * support color management.
+ *
+ * If the cache ID's are all 0 after calling this function, then color
+ * management is not supported by any CRTC. For short, checking if the gamma
+ * LUT size property ID == 0 is sufficient.
+ *
+ * @drm_fd: DRM file descriptor
+ * @drmmode: drmmode object, where the cached IDs are stored
+ * @mode_res: The DRM mode resource containing the CRTC ids
+ */
+static void drmmode_cache_cm_prop_ids(int drm_fd,
+ drmmode_ptr drmmode,
+ drmModeResPtr mode_res)
+{
+   drmModeObjectPropertiesPtr drm_props;
+   drmModePropertyPtr drm_prop;
+   enum drmmode_cm_prop cm_prop;
+   uint32_t cm_enabled = 0;
+   uint32_t cm_all_enabled = (1 << CM_NUM_PROPS) - 1;
+   int i, j;
+
+   memset(drmmode->cm_prop_ids, 0, sizeof(drmmode->cm_prop_ids));
+
+   /* If even one CRTC supports cm, then we cache the properties.
+* Otherwise, set them all to 0.
+*/
+   for (i = 0; i < drmmode->count_crtcs; i++) {
+
+   drm_props = drmModeObjectGetProperties(drm_fd,
+  mode_res->crtcs[i],
+  DRM_MODE_OBJECT_CRTC);
+   if (!drm_props)
+   continue;
+
+   for (j = 0; j < drm_props->count_props; j++) {
+   drm_prop = drmModeGetProperty(drm_fd,
+ drm_props->props[j]);
+   if (!drm_prop)
+   /* No guarantee that the property we failed to
+* get isn't a cm property. Skip to next CRTC.
+*/
+   break;
+
+   cm_prop = get_cm_enum_from_str(drm_prop->name);
+   if (cm_prop == CM_INVALID_PROP)
+   continue;
+
+   drmmode->cm_prop_ids[cm_prop] = drm_props->props[j];
+   cm_enabled |= 1 << cm_prop;
+
+   drmModeFreeProperty(drm_prop);
+   }
+   drmModeFreeObjectProperties(drm_props);
+
+   if (cm_enabled == cm_all_enabled)
+   break;
+
+   cm_enabled = 0;
+   memset(drmmode->cm_prop_ids, 0, sizeof(drmmode->cm_prop_ids));
+   }
+}
+
 Bool drmmode_pre_init(ScrnInfoPtr pScrn, drmmode_ptr drmmode, int cpp)
 {
AMDGPUEntPtr pAMDGPUEnt = AMDGPUEntPriv(pScrn);
@@ -2459,6 +2547,8 @@ Bool drmmode_pre_init(ScrnInfoPtr pScrn, drmmode_ptr 
drmmode, int cpp)
if (pScrn->depth == 30)
info->drmmode_crtc_funcs.gamma_set = NULL;
 
+   drmmode_cache_cm_prop_ids(pAMDGPUEnt->fd, drmmode, mode_res);
+
for (i = 0; i < mode_res->count_crtcs; i++)
if (!xf86IsEntityShared(pScrn->entityList[0]) ||
 

[PATCH xf86-video-amdgpu 0/7] Enabling Color Management - Round 3

2018-06-01 Thread sunpeng.li
From: "Leo (Sunpeng) Li" 

This ended up being different enough from v2 to warrant a new patchset. Per
Michel's suggestions, there have been various optimizations and cleanups.
Here's what's changed:

* Cache DRM color management property IDs at pre-init,
* instead of querying DRM each time we need to modify color properties.

* Remove drmmode_update_cm_props().
* Update color properties in drmmode_output_get_property() instead.
* This also makes old calls to update_cm_props redundant.

* Get rid of fake CRTCs.
* Previously, we were allocating a fake CRTC to configure color props on
  outputs that don't have a CRTC.
* Instead, rr_configure_and_change_cm_property() can be easily modified to
  accept NULL CRTCs.

* Drop patches to persist color properties across DPMS events.
* Kernel driver should be patched instead:
  https://lists.freedesktop.org/archives/amd-gfx/2018-May/022744.html
* Color props including legacy gamma now persist across crtc dpms.
* Non-legacy props now persist across output dpms and hotplug, as long
  as the same CRTC remains attached to that output.

And some smaller improvements:

* Change CTM to be 32-bit format instead of 16-bit.
* This requires clients to ensure that each 32-bit element is padded to be
  long-sized, since libXrandr parses 32-bit format as long-typed.

* Optimized color management init during CRTC init.
* Query DRM once for the list of properties, instead of twice.


The test/demo application is also updated to reflect that 32-bit CTM change:
https://cgit.freedesktop.org/~hwentland/color-demo-app

Leo (Sunpeng) Li (7):
  Cache color property IDs during pre-init
  Initialize color properties on CRTC during CRTC init
  Configure color properties when creating output resources
  Update color properties on output_get_property
  Enable setting of color properties via RandR
  Compose non-legacy with legacy regamma LUT
  Also compose LUT when setting legacy gamma

 src/drmmode_display.c | 739 +-
 src/drmmode_display.h |  27 ++
 2 files changed, 763 insertions(+), 3 deletions(-)

-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 01/18] drm/amdgpu: define vcn jpeg ring

2018-06-01 Thread Boyuan Zhang



On 2018-06-01 04:36 AM, Christian König wrote:
Patches #1 - #8 and patches #11-#18 are Reviewed-by: Christian König 
.


Patch #9:

static void vcn_v1_0_jpeg_ring_patch_wreg(struct amdgpu_ring *ring, 
uint32_t ptr, uint32_t reg_offset, uint32_t val)

That you don't pass ptr by reference here looks like a bug to me.


Sorry for the confusion, I shouldn't call it "ptr" because this is 
actually an index to the ring->ring[]. I will re-name it to sth like "idx"




Patch #10:

+    .extra_dw = 0,
I think we should either drop that or add it to all the other rings as 
well. I certainly prefer to just drop it, cause it's less maintenance 
work.


Thanks for the review. I will drop it accordingly.

Regards,
Boyuan



Apart from that the patch is Reviewed-by: Christian König 
 as well.


Thanks,
Christian.

Am 30.05.2018 um 22:27 schrieb boyuan.zh...@amd.com:

From: Boyuan Zhang 

Add AMDGPU_RING_TYPE_VCN_JPEG ring define

Signed-off-by: Boyuan Zhang 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 3 ++-
  1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h

index 1513124c..a3908ef 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
@@ -53,7 +53,8 @@ enum amdgpu_ring_type {
  AMDGPU_RING_TYPE_KIQ,
  AMDGPU_RING_TYPE_UVD_ENC,
  AMDGPU_RING_TYPE_VCN_DEC,
-    AMDGPU_RING_TYPE_VCN_ENC
+    AMDGPU_RING_TYPE_VCN_ENC,
+    AMDGPU_RING_TYPE_VCN_JPEG
  };
    struct amdgpu_device;




___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 1/5] dma_buf: remove device parameter from attach callback

2018-06-01 Thread Michel Dänzer
On 2018-06-01 05:17 PM, Christian König wrote:
> Am 01.06.2018 um 16:02 schrieb Michel Dänzer:
>> On 2018-06-01 02:11 PM, Christian König wrote:
>>> Sorry, accidentally send this series without a cover letter.
>>>
>>> This is a cleanup to the DMA-buf interface, which is also a prerequisite
>>> to unpinned DMA-buf operation.
>>>
>>> Patch #1 and #2 just remove unused functionality and clean up callback
>>> parameters.
>>>
>>> Patch #3 and #4 introduce taking the reservation lock during
>>> mapping/unmapping of DMA-bufs.
>>>
>>> This introduces a common lock where both exporter as well as importer
>>> can then use in the future for unpinned DMA-buf operation.
>>>
>>> This of course means that exporters should now not take this reservation
>>> lock manually any more. The DRM drivers don't seem to actually do that,
>>> but I'm not 100% sure about other implementations.
>>>
>>> Patch #5 then makes use of the new lock to simplify the DMA-buf import
>>> handling in amdgpu.
>> Please rebase this series on top of
>> https://patchwork.freedesktop.org/patch/226311/ and update the
>> documentation in amdgpu_prime.c as needed in each patch.
> 
> Sure. In this case can we get your patches committed to
> amd-staging-drm-next ASAP?

Sure, done.


-- 
Earthling Michel Dänzer   |   http://www.amd.com
Libre software enthusiast | Mesa and X developer
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 1/5] dma_buf: remove device parameter from attach callback

2018-06-01 Thread Christian König

Am 01.06.2018 um 16:02 schrieb Michel Dänzer:

On 2018-06-01 02:11 PM, Christian König wrote:

Sorry, accidentally send this series without a cover letter.

This is a cleanup to the DMA-buf interface, which is also a prerequisite
to unpinned DMA-buf operation.

Patch #1 and #2 just remove unused functionality and clean up callback
parameters.

Patch #3 and #4 introduce taking the reservation lock during
mapping/unmapping of DMA-bufs.

This introduces a common lock where both exporter as well as importer
can then use in the future for unpinned DMA-buf operation.

This of course means that exporters should now not take this reservation
lock manually any more. The DRM drivers don't seem to actually do that,
but I'm not 100% sure about other implementations.

Patch #5 then makes use of the new lock to simplify the DMA-buf import
handling in amdgpu.

Please rebase this series on top of
https://patchwork.freedesktop.org/patch/226311/ and update the
documentation in amdgpu_prime.c as needed in each patch.


Sure. In this case can we get your patches committed to 
amd-staging-drm-next ASAP?


Thanks,
Christian.
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 1/3] Revert "drm/amdgpu: Add an ATPX quirk for hybrid laptop"

2018-06-01 Thread Alex Deucher
On Thu, May 31, 2018 at 11:07 PM, Huang Rui  wrote:
> On Thu, May 24, 2018 at 02:46:34PM -0500, Alex Deucher wrote:
>> This reverts commit 13b40935cf64f59b93cf1c716a2033488e5a228c.
>>
>> This was a workaround for a bug in the HDA driver that prevented
>> the HDA audio chip from going into runtime pm which prevented
>> the GPU from going into runtime pm.
>>
>> Bug: https://bugs.freedesktop.org/show_bug.cgi?id=106597
>> Signed-off-by: Alex Deucher 
>
> For series are,
> Reviewed-by: Huang Rui 

Just a heads up, these patches shouldn't go into the dkms branches
until distros pick up this patch:
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=009f8c90f571d87855914dbc20e6c0ea2a3b19ae

Alex


>
>> ---
>>  drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c | 1 -
>>  1 file changed, 1 deletion(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c
>> index 1bcb2b247335..daa06e7c5bb7 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c
>> @@ -569,7 +569,6 @@ static const struct amdgpu_px_quirk 
>> amdgpu_px_quirk_list[] = {
>>   { 0x1002, 0x6900, 0x1002, 0x0124, AMDGPU_PX_QUIRK_FORCE_ATPX },
>>   { 0x1002, 0x6900, 0x1028, 0x0812, AMDGPU_PX_QUIRK_FORCE_ATPX },
>>   { 0x1002, 0x6900, 0x1028, 0x0813, AMDGPU_PX_QUIRK_FORCE_ATPX },
>> - { 0x1002, 0x67DF, 0x1028, 0x0774, AMDGPU_PX_QUIRK_FORCE_ATPX },
>>   { 0, 0, 0, 0, 0 },
>>  };
>>
>> --
>> 2.13.6
>>
>> ___
>> amd-gfx mailing list
>> amd-gfx@lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 3/3] drm/amdgpu: Add documentation for PRIME related code

2018-06-01 Thread Alex Deucher
On Fri, Jun 1, 2018 at 9:56 AM, Michel Dänzer  wrote:
> On 2018-06-01 03:44 PM, Alex Deucher wrote:
>> On Fri, Jun 1, 2018 at 9:40 AM, Michel Dänzer  wrote:
>>> On 2018-06-01 02:58 PM, Alex Deucher wrote:
 On Thu, May 31, 2018 at 12:17 PM, Michel Dänzer  wrote:
> From: Michel Dänzer 
>
> Signed-off-by: Michel Dänzer 

 Series is:
 Reviewed-by: Alex Deucher 
>>>
>>> Thanks. Is it okay to merge all of these via the amdgpu tree, or should
>>> I wait for an ack from Jon and/or core DRM maintainers for that?
>>
>> GPU documentation usually goes through the drm trees.  You might want
>> to push patch 1 (drm_mm.rst fix) via drm_misc, but the rest are can go
>> in via the amdgpu tree since they are amdgpu specific.
>
> I'd prefer merging all three patches together, since without patch 1,
> patch 3 generates the warning below while generating documentation, and
> the reference to the PRIME Buffer Sharing section doesn't work.
>
> .../linux/Documentation/gpu/amdgpu.rst:2: WARNING: undefined label:
> prime_buffer_sharing (if the link has no caption the label must precede
> a section header)


That's fine.  I'll take them all through the amdgpu tree.

Alex

>
>
> --
> Earthling Michel Dänzer   |   http://www.amd.com
> Libre software enthusiast | Mesa and X developer
> ___
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 1/5] dma_buf: remove device parameter from attach callback

2018-06-01 Thread Michel Dänzer
On 2018-06-01 02:11 PM, Christian König wrote:
> Sorry, accidentally send this series without a cover letter.
> 
> This is a cleanup to the DMA-buf interface, which is also a prerequisite
> to unpinned DMA-buf operation.
> 
> Patch #1 and #2 just remove unused functionality and clean up callback
> parameters.
> 
> Patch #3 and #4 introduce taking the reservation lock during
> mapping/unmapping of DMA-bufs.
> 
> This introduces a common lock where both exporter as well as importer
> can then use in the future for unpinned DMA-buf operation.
> 
> This of course means that exporters should now not take this reservation
> lock manually any more. The DRM drivers don't seem to actually do that,
> but I'm not 100% sure about other implementations.
> 
> Patch #5 then makes use of the new lock to simplify the DMA-buf import
> handling in amdgpu.

Please rebase this series on top of
https://patchwork.freedesktop.org/patch/226311/ and update the
documentation in amdgpu_prime.c as needed in each patch.


-- 
Earthling Michel Dänzer   |   http://www.amd.com
Libre software enthusiast | Mesa and X developer
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 3/3] drm/amdgpu: Add documentation for PRIME related code

2018-06-01 Thread Michel Dänzer
On 2018-06-01 03:44 PM, Alex Deucher wrote:
> On Fri, Jun 1, 2018 at 9:40 AM, Michel Dänzer  wrote:
>> On 2018-06-01 02:58 PM, Alex Deucher wrote:
>>> On Thu, May 31, 2018 at 12:17 PM, Michel Dänzer  wrote:
 From: Michel Dänzer 

 Signed-off-by: Michel Dänzer 
>>>
>>> Series is:
>>> Reviewed-by: Alex Deucher 
>>
>> Thanks. Is it okay to merge all of these via the amdgpu tree, or should
>> I wait for an ack from Jon and/or core DRM maintainers for that?
> 
> GPU documentation usually goes through the drm trees.  You might want
> to push patch 1 (drm_mm.rst fix) via drm_misc, but the rest are can go
> in via the amdgpu tree since they are amdgpu specific.

I'd prefer merging all three patches together, since without patch 1,
patch 3 generates the warning below while generating documentation, and
the reference to the PRIME Buffer Sharing section doesn't work.

.../linux/Documentation/gpu/amdgpu.rst:2: WARNING: undefined label:
prime_buffer_sharing (if the link has no caption the label must precede
a section header)


-- 
Earthling Michel Dänzer   |   http://www.amd.com
Libre software enthusiast | Mesa and X developer
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 3/3] drm/amdgpu: Add documentation for PRIME related code

2018-06-01 Thread Alex Deucher
On Fri, Jun 1, 2018 at 9:40 AM, Michel Dänzer  wrote:
> On 2018-06-01 02:58 PM, Alex Deucher wrote:
>> On Thu, May 31, 2018 at 12:17 PM, Michel Dänzer  wrote:
>>> From: Michel Dänzer 
>>>
>>> Signed-off-by: Michel Dänzer 
>>
>> Series is:
>> Reviewed-by: Alex Deucher 
>
> Thanks. Is it okay to merge all of these via the amdgpu tree, or should
> I wait for an ack from Jon and/or core DRM maintainers for that?

GPU documentation usually goes through the drm trees.  You might want
to push patch 1 (drm_mm.rst fix) via drm_misc, but the rest are can go
in via the amdgpu tree since they are amdgpu specific.

Alex
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 3/3] drm/amdgpu: Add documentation for PRIME related code

2018-06-01 Thread Michel Dänzer
On 2018-06-01 02:58 PM, Alex Deucher wrote:
> On Thu, May 31, 2018 at 12:17 PM, Michel Dänzer  wrote:
>> From: Michel Dänzer 
>>
>> Signed-off-by: Michel Dänzer 
> 
> Series is:
> Reviewed-by: Alex Deucher 

Thanks. Is it okay to merge all of these via the amdgpu tree, or should
I wait for an ack from Jon and/or core DRM maintainers for that?


-- 
Earthling Michel Dänzer   |   http://www.amd.com
Libre software enthusiast | Mesa and X developer
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 3/3] drm/amdgpu: Hook up amdgpu_object.c documentation

2018-06-01 Thread Alex Deucher
On Fri, Jun 1, 2018 at 6:54 AM, Michel Dänzer  wrote:
> From: Michel Dänzer 
>
> Signed-off-by: Michel Dänzer 

Series is:
Reviewed-by: Alex Deucher 

> ---
>  Documentation/gpu/amdgpu.rst | 9 +
>  1 file changed, 9 insertions(+)
>
> diff --git a/Documentation/gpu/amdgpu.rst b/Documentation/gpu/amdgpu.rst
> index ad3711fd2a28..1fbf3876a3d8 100644
> --- a/Documentation/gpu/amdgpu.rst
> +++ b/Documentation/gpu/amdgpu.rst
> @@ -18,6 +18,15 @@ Memory Domains
>  .. kernel-doc:: include/uapi/drm/amdgpu_drm.h
> :doc: memory domains
>
> +Buffer Objects
> +--
> +
> +.. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> +   :doc: amdgpu_object
> +
> +.. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> +   :internal:
> +
>  PRIME Buffer Sharing
>  
>
> --
> 2.17.0
>
> ___
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 3/3] drm/amdgpu: Add documentation for PRIME related code

2018-06-01 Thread Alex Deucher
On Thu, May 31, 2018 at 12:17 PM, Michel Dänzer  wrote:
> From: Michel Dänzer 
>
> Signed-off-by: Michel Dänzer 

Series is:
Reviewed-by: Alex Deucher 

> ---
>  Documentation/gpu/amdgpu.rst  |  14 +++
>  drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c | 119 ++
>  2 files changed, 133 insertions(+)
>
> diff --git a/Documentation/gpu/amdgpu.rst b/Documentation/gpu/amdgpu.rst
> index 41a14e4aa4ac..f557866f6788 100644
> --- a/Documentation/gpu/amdgpu.rst
> +++ b/Documentation/gpu/amdgpu.rst
> @@ -4,3 +4,17 @@
>
>  The drm/amdgpu driver supports all AMD Radeon GPUs based on the Graphics Core
>  Next (GCN) architecture.
> +
> +Core Driver Infrastructure
> +==
> +
> +This section covers core driver infrastructure.
> +
> +PRIME Buffer Sharing
> +
> +
> +.. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
> +   :doc: PRIME Buffer Sharing
> +
> +.. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
> +   :internal:
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
> index 4683626b065f..d1f05489595b 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
> @@ -23,6 +23,14 @@
>   *
>   * Authors: Alex Deucher
>   */
> +
> +/**
> + * DOC: PRIME Buffer Sharing
> + *
> + * The following callback implementations are used for :ref:`sharing GEM 
> buffer
> + * objects between different devices via PRIME `.
> + */
> +
>  #include 
>
>  #include "amdgpu.h"
> @@ -32,6 +40,14 @@
>
>  static const struct dma_buf_ops amdgpu_dmabuf_ops;
>
> +/**
> + * amdgpu_gem_prime_get_sg_table - _driver.gem_prime_get_sg_table
> + * implementation
> + * @obj: GEM buffer object
> + *
> + * Returns:
> + * A scatter/gather table for the pinned pages of the buffer object's memory.
> + */
>  struct sg_table *amdgpu_gem_prime_get_sg_table(struct drm_gem_object *obj)
>  {
> struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
> @@ -40,6 +56,15 @@ struct sg_table *amdgpu_gem_prime_get_sg_table(struct 
> drm_gem_object *obj)
> return drm_prime_pages_to_sg(bo->tbo.ttm->pages, npages);
>  }
>
> +/**
> + * amdgpu_gem_prime_vmap - _buf_ops.vmap implementation
> + * @obj: GEM buffer object
> + *
> + * Sets up an in-kernel virtual mapping of the buffer object's memory.
> + *
> + * Returns:
> + * The virtual address of the mapping or an error pointer.
> + */
>  void *amdgpu_gem_prime_vmap(struct drm_gem_object *obj)
>  {
> struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
> @@ -53,6 +78,13 @@ void *amdgpu_gem_prime_vmap(struct drm_gem_object *obj)
> return bo->dma_buf_vmap.virtual;
>  }
>
> +/**
> + * amdgpu_gem_prime_vunmap - _buf_ops.vunmap implementation
> + * @obj: GEM buffer object
> + * @vaddr: virtual address (unused)
> + *
> + * Tears down the in-kernel virtual mapping of the buffer object's memory.
> + */
>  void amdgpu_gem_prime_vunmap(struct drm_gem_object *obj, void *vaddr)
>  {
> struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
> @@ -60,6 +92,17 @@ void amdgpu_gem_prime_vunmap(struct drm_gem_object *obj, 
> void *vaddr)
> ttm_bo_kunmap(>dma_buf_vmap);
>  }
>
> +/**
> + * amdgpu_gem_prime_mmap - _driver.gem_prime_mmap implementation
> + * @obj: GEM buffer object
> + * @vma: virtual memory area
> + *
> + * Sets up a userspace mapping of the buffer object's memory in the given
> + * virtual memory area.
> + *
> + * Returns:
> + * 0 on success or negative error code.
> + */
>  int amdgpu_gem_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct 
> *vma)
>  {
> struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
> @@ -94,6 +137,19 @@ int amdgpu_gem_prime_mmap(struct drm_gem_object *obj, 
> struct vm_area_struct *vma
> return ret;
>  }
>
> +/**
> + * amdgpu_gem_prime_import_sg_table - _driver.gem_prime_import_sg_table
> + * implementation
> + * @dev: DRM device
> + * @attach: DMA-buf attachment
> + * @sg: Scatter/gather table
> + *
> + * Import shared DMA buffer memory exported by another device.
> + *
> + * Returns:
> + * A new GEM buffer object of the given DRM device, representing the memory
> + * described by the given DMA-buf attachment and scatter/gather table.
> + */
>  struct drm_gem_object *
>  amdgpu_gem_prime_import_sg_table(struct drm_device *dev,
>  struct dma_buf_attachment *attach,
> @@ -132,6 +188,19 @@ amdgpu_gem_prime_import_sg_table(struct drm_device *dev,
> return ERR_PTR(ret);
>  }
>
> +/**
> + * amdgpu_gem_map_attach - _buf_ops.attach implementation
> + * @dma_buf: shared DMA buffer
> + * @target_dev: target device
> + * @attach: DMA-buf attachment
> + *
> + * Makes sure that the shared DMA buffer can be accessed by the target 
> device.
> + * For now, simply pins it to the GTT domain, where it should be accessible 
> by
> + * all DMA devices.
> + *
> + * Returns:
> + * 0 on success or negative error code.
> + */
>  static int amdgpu_gem_map_attach(struct 

[PATCH 5/5] drm/amdgpu: add independent DMA-buf export v3

2018-06-01 Thread Christian König
The caching of SGT's done by the DRM code is actually quite harmful and
should probably removed altogether in the long term.

Start by providing a separate DMA-buf export implementation in amdgpu. This is
also a prerequisite of unpinned DMA-buf handling.

v2: fix unintended recursion, remove debugging leftovers
v3: split out from unpinned DMA-buf work

Signed-off-by: Christian König 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h   |  1 -
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c   |  1 -
 drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c | 73 ++-
 3 files changed, 32 insertions(+), 43 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 2d7500921c0b..93dc57d74fc2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -373,7 +373,6 @@ int amdgpu_gem_object_open(struct drm_gem_object *obj,
 void amdgpu_gem_object_close(struct drm_gem_object *obj,
struct drm_file *file_priv);
 unsigned long amdgpu_gem_timeout(uint64_t timeout_ns);
-struct sg_table *amdgpu_gem_prime_get_sg_table(struct drm_gem_object *obj);
 struct drm_gem_object *
 amdgpu_gem_prime_import_sg_table(struct drm_device *dev,
 struct dma_buf_attachment *attach,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index b0bf2f24da48..270b8ad927ea 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -907,7 +907,6 @@ static struct drm_driver kms_driver = {
.gem_prime_export = amdgpu_gem_prime_export,
.gem_prime_import = amdgpu_gem_prime_import,
.gem_prime_res_obj = amdgpu_gem_prime_res_obj,
-   .gem_prime_get_sg_table = amdgpu_gem_prime_get_sg_table,
.gem_prime_import_sg_table = amdgpu_gem_prime_import_sg_table,
.gem_prime_vmap = amdgpu_gem_prime_vmap,
.gem_prime_vunmap = amdgpu_gem_prime_vunmap,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
index a156b3891a3f..0c5a75b06648 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
@@ -32,14 +32,6 @@
 
 static const struct dma_buf_ops amdgpu_dmabuf_ops;
 
-struct sg_table *amdgpu_gem_prime_get_sg_table(struct drm_gem_object *obj)
-{
-   struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
-   int npages = bo->tbo.num_pages;
-
-   return drm_prime_pages_to_sg(bo->tbo.ttm->pages, npages);
-}
-
 void *amdgpu_gem_prime_vmap(struct drm_gem_object *obj)
 {
struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
@@ -132,23 +124,17 @@ amdgpu_gem_prime_import_sg_table(struct drm_device *dev,
return ERR_PTR(ret);
 }
 
-static int amdgpu_gem_map_attach(struct dma_buf *dma_buf,
-struct dma_buf_attachment *attach)
+static struct sg_table *
+amdgpu_gem_map_dma_buf(struct dma_buf_attachment *attach,
+  enum dma_data_direction dir)
 {
+   struct dma_buf *dma_buf = attach->dmabuf;
struct drm_gem_object *obj = dma_buf->priv;
struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
+   struct sg_table *sgt;
long r;
 
-   r = drm_gem_map_attach(dma_buf, attach);
-   if (r)
-   return r;
-
-   r = amdgpu_bo_reserve(bo, false);
-   if (unlikely(r != 0))
-   goto error_detach;
-
-
if (attach->dev->driver != adev->dev->driver) {
/*
 * Wait for all shared fences to complete before we switch to 
future
@@ -159,46 +145,53 @@ static int amdgpu_gem_map_attach(struct dma_buf *dma_buf,
MAX_SCHEDULE_TIMEOUT);
if (unlikely(r < 0)) {
DRM_DEBUG_PRIME("Fence wait failed: %li\n", r);
-   goto error_unreserve;
+   return ERR_PTR(r);
}
}
 
/* pin buffer into GTT */
r = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT, NULL);
if (r)
-   goto error_unreserve;
+   return ERR_PTR(r);
+
+   sgt = drm_prime_pages_to_sg(bo->tbo.ttm->pages, bo->tbo.num_pages);
+   if (IS_ERR(sgt))
+   return sgt;
+
+   if (!dma_map_sg_attrs(attach->dev, sgt->sgl, sgt->nents, dir,
+ DMA_ATTR_SKIP_CPU_SYNC))
+   goto error_free;
 
if (attach->dev->driver != adev->dev->driver)
bo->prime_shared_count++;
 
-error_unreserve:
-   amdgpu_bo_unreserve(bo);
+   return sgt;
 
-error_detach:
-   if (r)
-   drm_gem_map_detach(dma_buf, attach);
-   return r;
+error_free:
+   sg_free_table(sgt);
+   kfree(sgt);
+   return ERR_PTR(-ENOMEM);
 }
 
-static void amdgpu_gem_map_detach(struct dma_buf *dma_buf,
-   

[PATCH 3/5] dma-buf: lock the reservation object during (un)map_dma_buf

2018-06-01 Thread Christian König
First step towards unpinned DMA buf operation.

I've checked the DRM drivers to potential locking of the reservation
object, but essentially we need to audit all implementations of the
dma_buf _ops for this to work.

Signed-off-by: Christian König 
---
 drivers/dma-buf/dma-buf.c | 4 
 include/linux/dma-buf.h   | 4 
 2 files changed, 8 insertions(+)

diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c
index e4c657d9fad7..4f0708cb58a7 100644
--- a/drivers/dma-buf/dma-buf.c
+++ b/drivers/dma-buf/dma-buf.c
@@ -631,7 +631,9 @@ struct sg_table *dma_buf_map_attachment(struct 
dma_buf_attachment *attach,
if (WARN_ON(!attach || !attach->dmabuf))
return ERR_PTR(-EINVAL);
 
+   reservation_object_lock(attach->dmabuf->resv, NULL);
sg_table = attach->dmabuf->ops->map_dma_buf(attach, direction);
+   reservation_object_unlock(attach->dmabuf->resv);
if (!sg_table)
sg_table = ERR_PTR(-ENOMEM);
 
@@ -658,8 +660,10 @@ void dma_buf_unmap_attachment(struct dma_buf_attachment 
*attach,
if (WARN_ON(!attach || !attach->dmabuf || !sg_table))
return;
 
+   reservation_object_lock(attach->dmabuf->resv, NULL);
attach->dmabuf->ops->unmap_dma_buf(attach, sg_table,
direction);
+   reservation_object_unlock(attach->dmabuf->resv);
 }
 EXPORT_SYMBOL_GPL(dma_buf_unmap_attachment);
 
diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h
index d17cadd76802..d2ba7a027a78 100644
--- a/include/linux/dma-buf.h
+++ b/include/linux/dma-buf.h
@@ -118,6 +118,8 @@ struct dma_buf_ops {
 * any other kind of sharing that the exporter might wish to make
 * available to buffer-users.
 *
+* This is called with the dmabuf->resv object locked.
+*
 * Returns:
 *
 * A _table scatter list of or the backing storage of the DMA buffer,
@@ -138,6 +140,8 @@ struct dma_buf_ops {
 * It should also unpin the backing storage if this is the last mapping
 * of the DMA buffer, it the exporter supports backing storage
 * migration.
+*
+* This is called with the dmabuf->resv object locked.
 */
void (*unmap_dma_buf)(struct dma_buf_attachment *,
  struct sg_table *,
-- 
2.14.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 4/5] dma-buf: add dma_buf_(un)map_attachment_locked variants

2018-06-01 Thread Christian König
Add function variants which can be called with the reservation lock
already held.

Signed-off-by: Christian König 
---
 drivers/dma-buf/dma-buf.c | 60 ++-
 include/linux/dma-buf.h   |  5 
 2 files changed, 59 insertions(+), 6 deletions(-)

diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c
index 4f0708cb58a7..3371509b468e 100644
--- a/drivers/dma-buf/dma-buf.c
+++ b/drivers/dma-buf/dma-buf.c
@@ -606,6 +606,38 @@ void dma_buf_detach(struct dma_buf *dmabuf, struct 
dma_buf_attachment *attach)
 }
 EXPORT_SYMBOL_GPL(dma_buf_detach);
 
+/**
+ * dma_buf_map_attachment_locked - Maps the buffer into _device_ address space
+ * with the reservation lock held. Is a wrapper for map_dma_buf() of the
+ *
+ * Returns the scatterlist table of the attachment;
+ * dma_buf_ops.
+ * @attach:[in]attachment whose scatterlist is to be returned
+ * @direction: [in]direction of DMA transfer
+ *
+ * Returns sg_table containing the scatterlist to be returned; returns ERR_PTR
+ * on error. May return -EINTR if it is interrupted by a signal.
+ *
+ * A mapping must be unmapped by using dma_buf_unmap_attachment(). Note that
+ * the underlying backing storage is pinned for as long as a mapping exists,
+ * therefore users/importers should not hold onto a mapping for undue amounts 
of
+ * time.
+ */
+struct sg_table *
+dma_buf_map_attachment_locked(struct dma_buf_attachment *attach,
+ enum dma_data_direction direction)
+{
+   struct sg_table *sg_table;
+
+   might_sleep();
+   sg_table = attach->dmabuf->ops->map_dma_buf(attach, direction);
+   if (!sg_table)
+   sg_table = ERR_PTR(-ENOMEM);
+
+   return sg_table;
+}
+EXPORT_SYMBOL_GPL(dma_buf_map_attachment_locked);
+
 /**
  * dma_buf_map_attachment - Returns the scatterlist table of the attachment;
  * mapped into _device_ address space. Is a wrapper for map_dma_buf() of the
@@ -626,13 +658,12 @@ struct sg_table *dma_buf_map_attachment(struct 
dma_buf_attachment *attach,
 {
struct sg_table *sg_table;
 
-   might_sleep();
 
if (WARN_ON(!attach || !attach->dmabuf))
return ERR_PTR(-EINVAL);
 
reservation_object_lock(attach->dmabuf->resv, NULL);
-   sg_table = attach->dmabuf->ops->map_dma_buf(attach, direction);
+   sg_table = dma_buf_map_attachment_locked(attach, direction);
reservation_object_unlock(attach->dmabuf->resv);
if (!sg_table)
sg_table = ERR_PTR(-ENOMEM);
@@ -641,6 +672,26 @@ struct sg_table *dma_buf_map_attachment(struct 
dma_buf_attachment *attach,
 }
 EXPORT_SYMBOL_GPL(dma_buf_map_attachment);
 
+/**
+ * dma_buf_unmap_attachment_locked - unmaps the buffer with reservation lock
+ * held, should deallocate the associated scatterlist. Is a wrapper for
+ * unmap_dma_buf() of dma_buf_ops.
+ * @attach:[in]attachment to unmap buffer from
+ * @sg_table:  [in]scatterlist info of the buffer to unmap
+ * @direction:  [in]direction of DMA transfer
+ *
+ * This unmaps a DMA mapping for @attached obtained by 
dma_buf_map_attachment().
+ */
+void dma_buf_unmap_attachment_locked(struct dma_buf_attachment *attach,
+struct sg_table *sg_table,
+enum dma_data_direction direction)
+{
+   might_sleep();
+   attach->dmabuf->ops->unmap_dma_buf(attach, sg_table,
+   direction);
+}
+EXPORT_SYMBOL_GPL(dma_buf_unmap_attachment_locked);
+
 /**
  * dma_buf_unmap_attachment - unmaps and decreases usecount of the buffer;might
  * deallocate the scatterlist associated. Is a wrapper for unmap_dma_buf() of
@@ -655,14 +706,11 @@ void dma_buf_unmap_attachment(struct dma_buf_attachment 
*attach,
struct sg_table *sg_table,
enum dma_data_direction direction)
 {
-   might_sleep();
-
if (WARN_ON(!attach || !attach->dmabuf || !sg_table))
return;
 
reservation_object_lock(attach->dmabuf->resv, NULL);
-   attach->dmabuf->ops->unmap_dma_buf(attach, sg_table,
-   direction);
+   dma_buf_unmap_attachment_locked(attach, sg_table, direction);
reservation_object_unlock(attach->dmabuf->resv);
 }
 EXPORT_SYMBOL_GPL(dma_buf_unmap_attachment);
diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h
index d2ba7a027a78..968777e8c662 100644
--- a/include/linux/dma-buf.h
+++ b/include/linux/dma-buf.h
@@ -388,8 +388,13 @@ int dma_buf_fd(struct dma_buf *dmabuf, int flags);
 struct dma_buf *dma_buf_get(int fd);
 void dma_buf_put(struct dma_buf *dmabuf);
 
+struct sg_table *dma_buf_map_attachment_locked(struct dma_buf_attachment *,
+  enum dma_data_direction);
 struct sg_table *dma_buf_map_attachment(struct dma_buf_attachment *,
enum 

[PATCH 2/5] dma-buf: remove kmap_atomic interface

2018-06-01 Thread Christian König
Neither used nor correctly implemented anywhere. Just completely remove
the interface.

Signed-off-by: Christian König 
---
 drivers/dma-buf/dma-buf.c  | 44 --
 drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c  |  2 -
 drivers/gpu/drm/armada/armada_gem.c|  2 -
 drivers/gpu/drm/drm_prime.c| 26 -
 drivers/gpu/drm/i915/i915_gem_dmabuf.c | 11 --
 drivers/gpu/drm/i915/selftests/mock_dmabuf.c   |  2 -
 drivers/gpu/drm/omapdrm/omap_gem_dmabuf.c  |  2 -
 drivers/gpu/drm/tegra/gem.c| 14 ---
 drivers/gpu/drm/udl/udl_dmabuf.c   | 17 -
 drivers/gpu/drm/vmwgfx/vmwgfx_prime.c  | 13 ---
 .../media/common/videobuf2/videobuf2-dma-contig.c  |  1 -
 drivers/media/common/videobuf2/videobuf2-dma-sg.c  |  1 -
 drivers/media/common/videobuf2/videobuf2-vmalloc.c |  1 -
 drivers/staging/android/ion/ion.c  |  2 -
 drivers/tee/tee_shm.c  |  6 ---
 include/drm/drm_prime.h|  4 --
 include/linux/dma-buf.h|  4 --
 17 files changed, 152 deletions(-)

diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c
index e99a8d19991b..e4c657d9fad7 100644
--- a/drivers/dma-buf/dma-buf.c
+++ b/drivers/dma-buf/dma-buf.c
@@ -405,7 +405,6 @@ struct dma_buf *dma_buf_export(const struct 
dma_buf_export_info *exp_info)
  || !exp_info->ops->map_dma_buf
  || !exp_info->ops->unmap_dma_buf
  || !exp_info->ops->release
- || !exp_info->ops->map_atomic
  || !exp_info->ops->map
  || !exp_info->ops->mmap)) {
return ERR_PTR(-EINVAL);
@@ -687,14 +686,6 @@ EXPORT_SYMBOL_GPL(dma_buf_unmap_attachment);
  *  void \*dma_buf_kmap(struct dma_buf \*, unsigned long);
  *  void dma_buf_kunmap(struct dma_buf \*, unsigned long, void \*);
  *
- *   There are also atomic variants of these interfaces. Like for kmap they
- *   facilitate non-blocking fast-paths. Neither the importer nor the exporter
- *   (in the callback) is allowed to block when using these.
- *
- *   Interfaces::
- *  void \*dma_buf_kmap_atomic(struct dma_buf \*, unsigned long);
- *  void dma_buf_kunmap_atomic(struct dma_buf \*, unsigned long, void \*);
- *
  *   For importers all the restrictions of using kmap apply, like the limited
  *   supply of kmap_atomic slots. Hence an importer shall only hold onto at
  *   max 2 atomic dma_buf kmaps at the same time (in any given process 
context).
@@ -859,41 +850,6 @@ int dma_buf_end_cpu_access(struct dma_buf *dmabuf,
 }
 EXPORT_SYMBOL_GPL(dma_buf_end_cpu_access);
 
-/**
- * dma_buf_kmap_atomic - Map a page of the buffer object into kernel address
- * space. The same restrictions as for kmap_atomic and friends apply.
- * @dmabuf:[in]buffer to map page from.
- * @page_num:  [in]page in PAGE_SIZE units to map.
- *
- * This call must always succeed, any necessary preparations that might fail
- * need to be done in begin_cpu_access.
- */
-void *dma_buf_kmap_atomic(struct dma_buf *dmabuf, unsigned long page_num)
-{
-   WARN_ON(!dmabuf);
-
-   return dmabuf->ops->map_atomic(dmabuf, page_num);
-}
-EXPORT_SYMBOL_GPL(dma_buf_kmap_atomic);
-
-/**
- * dma_buf_kunmap_atomic - Unmap a page obtained by dma_buf_kmap_atomic.
- * @dmabuf:[in]buffer to unmap page from.
- * @page_num:  [in]page in PAGE_SIZE units to unmap.
- * @vaddr: [in]kernel space pointer obtained from dma_buf_kmap_atomic.
- *
- * This call must always succeed.
- */
-void dma_buf_kunmap_atomic(struct dma_buf *dmabuf, unsigned long page_num,
-  void *vaddr)
-{
-   WARN_ON(!dmabuf);
-
-   if (dmabuf->ops->unmap_atomic)
-   dmabuf->ops->unmap_atomic(dmabuf, page_num, vaddr);
-}
-EXPORT_SYMBOL_GPL(dma_buf_kunmap_atomic);
-
 /**
  * dma_buf_kmap - Map a page of the buffer object into kernel address space. 
The
  * same restrictions as for kmap and friends apply.
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
index f1500f1ec0f5..a156b3891a3f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
@@ -244,9 +244,7 @@ static const struct dma_buf_ops amdgpu_dmabuf_ops = {
.release = drm_gem_dmabuf_release,
.begin_cpu_access = amdgpu_gem_begin_cpu_access,
.map = drm_gem_dmabuf_kmap,
-   .map_atomic = drm_gem_dmabuf_kmap_atomic,
.unmap = drm_gem_dmabuf_kunmap,
-   .unmap_atomic = drm_gem_dmabuf_kunmap_atomic,
.mmap = drm_gem_dmabuf_mmap,
.vmap = drm_gem_dmabuf_vmap,
.vunmap = drm_gem_dmabuf_vunmap,
diff --git a/drivers/gpu/drm/armada/armada_gem.c 
b/drivers/gpu/drm/armada/armada_gem.c
index 

[PATCH 1/5] dma_buf: remove device parameter from attach callback

2018-06-01 Thread Christian König
The device parameter is completely unused because it is available in the
attachment structure as well.

Signed-off-by: Christian König 
---
 drivers/dma-buf/dma-buf.c | 2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c | 3 +--
 drivers/gpu/drm/drm_prime.c   | 3 +--
 drivers/gpu/drm/udl/udl_dmabuf.c  | 1 -
 drivers/gpu/drm/vmwgfx/vmwgfx_prime.c | 1 -
 drivers/media/common/videobuf2/videobuf2-dma-contig.c | 2 +-
 drivers/media/common/videobuf2/videobuf2-dma-sg.c | 2 +-
 drivers/media/common/videobuf2/videobuf2-vmalloc.c| 2 +-
 include/drm/drm_prime.h   | 2 +-
 include/linux/dma-buf.h   | 3 +--
 10 files changed, 8 insertions(+), 13 deletions(-)

diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c
index d78d5fc173dc..e99a8d19991b 100644
--- a/drivers/dma-buf/dma-buf.c
+++ b/drivers/dma-buf/dma-buf.c
@@ -568,7 +568,7 @@ struct dma_buf_attachment *dma_buf_attach(struct dma_buf 
*dmabuf,
mutex_lock(>lock);
 
if (dmabuf->ops->attach) {
-   ret = dmabuf->ops->attach(dmabuf, dev, attach);
+   ret = dmabuf->ops->attach(dmabuf, attach);
if (ret)
goto err_attach;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
index 4683626b065f..f1500f1ec0f5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
@@ -133,7 +133,6 @@ amdgpu_gem_prime_import_sg_table(struct drm_device *dev,
 }
 
 static int amdgpu_gem_map_attach(struct dma_buf *dma_buf,
-struct device *target_dev,
 struct dma_buf_attachment *attach)
 {
struct drm_gem_object *obj = dma_buf->priv;
@@ -141,7 +140,7 @@ static int amdgpu_gem_map_attach(struct dma_buf *dma_buf,
struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
long r;
 
-   r = drm_gem_map_attach(dma_buf, target_dev, attach);
+   r = drm_gem_map_attach(dma_buf, attach);
if (r)
return r;
 
diff --git a/drivers/gpu/drm/drm_prime.c b/drivers/gpu/drm/drm_prime.c
index 7856a9b3f8a8..4a3a232fea67 100644
--- a/drivers/gpu/drm/drm_prime.c
+++ b/drivers/gpu/drm/drm_prime.c
@@ -186,7 +186,6 @@ static int drm_prime_lookup_buf_handle(struct 
drm_prime_file_private *prime_fpri
 /**
  * drm_gem_map_attach - dma_buf attach implementation for GEM
  * @dma_buf: buffer to attach device to
- * @target_dev: not used
  * @attach: buffer attachment data
  *
  * Allocates _prime_attachment and calls _driver.gem_prime_pin for
@@ -195,7 +194,7 @@ static int drm_prime_lookup_buf_handle(struct 
drm_prime_file_private *prime_fpri
  *
  * Returns 0 on success, negative error code on failure.
  */
-int drm_gem_map_attach(struct dma_buf *dma_buf, struct device *target_dev,
+int drm_gem_map_attach(struct dma_buf *dma_buf,
   struct dma_buf_attachment *attach)
 {
struct drm_prime_attachment *prime_attach;
diff --git a/drivers/gpu/drm/udl/udl_dmabuf.c b/drivers/gpu/drm/udl/udl_dmabuf.c
index 2867ed155ff6..5fdc8bdc2026 100644
--- a/drivers/gpu/drm/udl/udl_dmabuf.c
+++ b/drivers/gpu/drm/udl/udl_dmabuf.c
@@ -29,7 +29,6 @@ struct udl_drm_dmabuf_attachment {
 };
 
 static int udl_attach_dma_buf(struct dma_buf *dmabuf,
- struct device *dev,
  struct dma_buf_attachment *attach)
 {
struct udl_drm_dmabuf_attachment *udl_attach;
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_prime.c 
b/drivers/gpu/drm/vmwgfx/vmwgfx_prime.c
index 0d42a46521fc..fbffb37ccf42 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_prime.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_prime.c
@@ -40,7 +40,6 @@
  */
 
 static int vmw_prime_map_attach(struct dma_buf *dma_buf,
-   struct device *target_dev,
struct dma_buf_attachment *attach)
 {
return -ENOSYS;
diff --git a/drivers/media/common/videobuf2/videobuf2-dma-contig.c 
b/drivers/media/common/videobuf2/videobuf2-dma-contig.c
index f1178f6f434d..12d0072c52c2 100644
--- a/drivers/media/common/videobuf2/videobuf2-dma-contig.c
+++ b/drivers/media/common/videobuf2/videobuf2-dma-contig.c
@@ -222,7 +222,7 @@ struct vb2_dc_attachment {
enum dma_data_direction dma_dir;
 };
 
-static int vb2_dc_dmabuf_ops_attach(struct dma_buf *dbuf, struct device *dev,
+static int vb2_dc_dmabuf_ops_attach(struct dma_buf *dbuf,
struct dma_buf_attachment *dbuf_attach)
 {
struct vb2_dc_attachment *attach;
diff --git a/drivers/media/common/videobuf2/videobuf2-dma-sg.c 
b/drivers/media/common/videobuf2/videobuf2-dma-sg.c
index 753ed3138dcc..cf94765e593f 100644
--- a/drivers/media/common/videobuf2/videobuf2-dma-sg.c
+++ b/drivers/media/common/videobuf2/videobuf2-dma-sg.c
@@ -371,7 +371,7 @@ struct 

[PATCH 3/3] drm/amdgpu: Hook up amdgpu_object.c documentation

2018-06-01 Thread Michel Dänzer
From: Michel Dänzer 

Signed-off-by: Michel Dänzer 
---
 Documentation/gpu/amdgpu.rst | 9 +
 1 file changed, 9 insertions(+)

diff --git a/Documentation/gpu/amdgpu.rst b/Documentation/gpu/amdgpu.rst
index ad3711fd2a28..1fbf3876a3d8 100644
--- a/Documentation/gpu/amdgpu.rst
+++ b/Documentation/gpu/amdgpu.rst
@@ -18,6 +18,15 @@ Memory Domains
 .. kernel-doc:: include/uapi/drm/amdgpu_drm.h
:doc: memory domains
 
+Buffer Objects
+--
+
+.. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+   :doc: amdgpu_object
+
+.. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+   :internal:
+
 PRIME Buffer Sharing
 
 
-- 
2.17.0

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 2/3] drm/amdgpu: Fix-ups for amdgpu_object.c documentation

2018-06-01 Thread Michel Dänzer
From: Michel Dänzer 

* Fix format of return value descriptions
* Document all parameters of amdgpu_bo_free_kernel
* Document amdgpu_bo_get_preferred_pin_domain

Signed-off-by: Michel Dänzer 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 78 +++---
 1 file changed, 55 insertions(+), 23 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index 78d75ae5932f..987a9fa33d35 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -93,7 +93,8 @@ static void amdgpu_ttm_bo_destroy(struct ttm_buffer_object 
*tbo)
  * Uses destroy function associated with the object to determine if this is
  * an _bo.
  *
- * Returns true if the object belongs to _bo, false if not.
+ * Returns:
+ * true if the object belongs to _bo, false if not.
  */
 bool amdgpu_ttm_bo_is_amdgpu_bo(struct ttm_buffer_object *bo)
 {
@@ -214,7 +215,8 @@ void amdgpu_ttm_placement_from_domain(struct amdgpu_bo 
*abo, u32 domain)
  *
  * Note: For bo_ptr new BO is only created if bo_ptr points to NULL.
  *
- * Returns 0 on success, negative error code otherwise.
+ * Returns:
+ * 0 on success, negative error code otherwise.
  */
 int amdgpu_bo_create_reserved(struct amdgpu_device *adev,
  unsigned long size, int align,
@@ -291,7 +293,8 @@ int amdgpu_bo_create_reserved(struct amdgpu_device *adev,
  *
  * Note: For bo_ptr new BO is only created if bo_ptr points to NULL.
  *
- * Returns 0 on success, negative error code otherwise.
+ * Returns:
+ * 0 on success, negative error code otherwise.
  */
 int amdgpu_bo_create_kernel(struct amdgpu_device *adev,
unsigned long size, int align,
@@ -315,6 +318,8 @@ int amdgpu_bo_create_kernel(struct amdgpu_device *adev,
  * amdgpu_bo_free_kernel - free BO for kernel use
  *
  * @bo: amdgpu BO to free
+ * @gpu_addr: pointer to where the BO's GPU memory space address was stored
+ * @cpu_addr: pointer to where the BO's CPU memory space address was stored
  *
  * unmaps and unpin a BO for kernel internal use.
  */
@@ -539,7 +544,8 @@ static int amdgpu_bo_create_shadow(struct amdgpu_device 
*adev,
  * Shadow object is used to backup the original buffer object, and is always
  * in GTT.
  *
- * Returns 0 for success or a negative error code on failure.
+ * Returns:
+ * 0 for success or a negative error code on failure.
  */
 int amdgpu_bo_create(struct amdgpu_device *adev,
 struct amdgpu_bo_param *bp,
@@ -582,7 +588,8 @@ int amdgpu_bo_create(struct amdgpu_device *adev,
  * Copies an _bo buffer object to its shadow object.
  * Not used for now.
  *
- * Returns 0 for success or a negative error code on failure.
+ * Returns:
+ * 0 for success or a negative error code on failure.
  */
 int amdgpu_bo_backup_to_shadow(struct amdgpu_device *adev,
   struct amdgpu_ring *ring,
@@ -625,7 +632,8 @@ int amdgpu_bo_backup_to_shadow(struct amdgpu_device *adev,
  * This is used for validating shadow bos.  It calls ttm_bo_validate() to
  * make sure the buffer is resident where it needs to be.
  *
- * Returns 0 for success or a negative error code on failure.
+ * Returns:
+ * 0 for success or a negative error code on failure.
  */
 int amdgpu_bo_validate(struct amdgpu_bo *bo)
 {
@@ -662,7 +670,8 @@ int amdgpu_bo_validate(struct amdgpu_bo *bo)
  * This is used for recovering a buffer from its shadow in case of a gpu
  * reset where vram context may be lost.
  *
- * Returns 0 for success or a negative error code on failure.
+ * Returns:
+ * 0 for success or a negative error code on failure.
  */
 int amdgpu_bo_restore_from_shadow(struct amdgpu_device *adev,
  struct amdgpu_ring *ring,
@@ -704,7 +713,8 @@ int amdgpu_bo_restore_from_shadow(struct amdgpu_device 
*adev,
  * Calls ttm_bo_kmap() to set up the kernel virtual mapping; calls
  * amdgpu_bo_kptr() to get the kernel virtual address.
  *
- * Returns 0 for success or a negative error code on failure.
+ * Returns:
+ * 0 for success or a negative error code on failure.
  */
 int amdgpu_bo_kmap(struct amdgpu_bo *bo, void **ptr)
 {
@@ -742,7 +752,8 @@ int amdgpu_bo_kmap(struct amdgpu_bo *bo, void **ptr)
  *
  * Calls ttm_kmap_obj_virtual() to get the kernel virtual address
  *
- * Returns the virtual address of a buffer object area.
+ * Returns:
+ * the virtual address of a buffer object area.
  */
 void *amdgpu_bo_kptr(struct amdgpu_bo *bo)
 {
@@ -769,7 +780,8 @@ void amdgpu_bo_kunmap(struct amdgpu_bo *bo)
  *
  * References the contained _buffer_object.
  *
- * Returns a refcounted pointer to the _bo buffer object.
+ * Returns:
+ * a refcounted pointer to the _bo buffer object.
  */
 struct amdgpu_bo *amdgpu_bo_ref(struct amdgpu_bo *bo)
 {
@@ -819,7 +831,8 @@ void amdgpu_bo_unref(struct amdgpu_bo **bo)
  * where to pin a buffer if there are specific restrictions on where a buffer
  * must be located.
  *
- * Returns 0 for 

[PATCH 1/3] drm/amdgpu: Hook up documentation about memory domains

2018-06-01 Thread Michel Dänzer
From: Michel Dänzer 

Signed-off-by: Michel Dänzer 
---
 Documentation/gpu/amdgpu.rst | 8 
 1 file changed, 8 insertions(+)

diff --git a/Documentation/gpu/amdgpu.rst b/Documentation/gpu/amdgpu.rst
index f557866f6788..ad3711fd2a28 100644
--- a/Documentation/gpu/amdgpu.rst
+++ b/Documentation/gpu/amdgpu.rst
@@ -10,6 +10,14 @@ Core Driver Infrastructure
 
 This section covers core driver infrastructure.
 
+.. _amdgpu_memory_domains:
+
+Memory Domains
+--
+
+.. kernel-doc:: include/uapi/drm/amdgpu_drm.h
+   :doc: memory domains
+
 PRIME Buffer Sharing
 
 
-- 
2.17.0

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] drm/amd/display: avoid sleeping in atomic context while creating new context or state

2018-06-01 Thread Christian König

Ok, that works for me as well.

Please always check if it is really necessary before adding any 
GFP_ATOMIC allocation, cause that is rather invasive and should be avoided.


Christian.

Am 01.06.2018 um 11:56 schrieb S, Shirish:


The V2 of this patch is already reviewed by Harry.
The change i have made in dc_create() is no more applicable.

Regards,
Shirish S
On 5/31/2018 11:35 PM, Christian König wrote:

Am 30.05.2018 um 18:03 schrieb Harry Wentland:

On 2018-05-30 06:17 AM, Shirish S wrote:

This patch fixes the warning messages that are caused due to calling
sleep in atomic context as below:

BUG: sleeping function called from invalid context at mm/slab.h:419
in_atomic(): 1, irqs_disabled(): 1, pid: 5, name: kworker/u4:0
CPU: 1 PID: 5 Comm: kworker/u4:0 Tainted: G    W 4.14.35 #941
Workqueue: events_unbound commit_work
Call Trace:
  dump_stack+0x4d/0x63
  ___might_sleep+0x11f/0x12e
  kmem_cache_alloc_trace+0x41/0xea
  dc_create_state+0x1f/0x30
  dc_commit_updates_for_stream+0x73/0x4cf
  ? amdgpu_get_crtc_scanoutpos+0x82/0x16b
  amdgpu_dm_do_flip+0x239/0x298
  amdgpu_dm_commit_planes.isra.23+0x379/0x54b
  ? dc_commit_state+0x3da/0x404
  amdgpu_dm_atomic_commit_tail+0x4fc/0x5d2
  ? wait_for_common+0x5b/0x69
  commit_tail+0x42/0x64
  process_one_work+0x1b0/0x314
  worker_thread+0x1cb/0x2c1
  ? create_worker+0x1da/0x1da
  kthread+0x156/0x15e
  ? kthread_flush_work+0xea/0xea
  ret_from_fork+0x22/0x40

Signed-off-by: Shirish S 
---
  drivers/gpu/drm/amd/display/dc/core/dc.c | 4 ++--
  1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c 
b/drivers/gpu/drm/amd/display/dc/core/dc.c

index 33149ed..d62206f 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc.c
@@ -588,7 +588,7 @@ static void disable_dangling_plane(struct dc 
*dc, struct dc_state *context)

    struct dc *dc_create(const struct dc_init_data *init_params)
   {
-    struct dc *dc = kzalloc(sizeof(*dc), GFP_KERNEL);
+    struct dc *dc = kzalloc(sizeof(*dc), GFP_ATOMIC);

Are you sure this one can be called in atomic_context?

If so then everything in consstruct() would also need GFP_ATOMIC.


Well the backtrace is quite obvious, but I agree that change still 
looks fishy to me as well.


Using GFP_ATOMIC should only be a last resort when nothing else 
helps, but here it looks more like we misuse a spinlock where a mutex 
or semaphore would be more appropriate.


Where exactly becomes the context atomic in the call trace?

Christian.



Harry


  unsigned int full_pipe_count;
    if (NULL == dc)
@@ -937,7 +937,7 @@ bool dc_post_update_surfaces_to_stream(struct 
dc *dc)

  struct dc_state *dc_create_state(void)
  {
  struct dc_state *context = kzalloc(sizeof(struct dc_state),
-   GFP_KERNEL);
+   GFP_ATOMIC);
    if (!context)
  return NULL;


___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx






___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] drm/amdgpu: fix ib test hang with gfxoff enabled

2018-06-01 Thread Christian König

Am 01.06.2018 um 11:29 schrieb Huang Rui:

On Fri, Jun 01, 2018 at 05:13:49PM +0800, Christian König wrote:

Am 01.06.2018 um 08:41 schrieb Huang Rui:

After defer the execution of gfx/compute ib tests. However, at that time, the
gfx already go into "mid state" of gfxoff.

PWR_MISC_CNTL_STATUS: PWR_GFXOFF_STATUS field (2:1 bits)
0 = GFXOFF.
1 = Transition out of GFXOFF state.
2 = Not in GFXOFF.
3 = Transition into GFXOFF.

If hit the mid state (1 or 3), the doorbell writing interrupt cannot wake up the
gfx back successfully. And the field value is 1 when we issue the ib test at
that, so we got the hang. This is the root cause that we encountered the issue.

Meanwhile, we cannot set clockgating of GFX after gfx is already in "off" state.
So here we should move the gfx powergating and gfxoff enabling behavior at the
end of initialization behind ib test and clockgating.

Mhm, that still looks like a only halve backed solution:

1. What prevents this bug from happening during "normal" IB submission
from userspace?

2. Shouldn't we poll the PWR_MISC_CNTL_STATUS register to make sure we
are not in any transition phase instead?


Yes, right. How about also add polling of PWR_MISC_CNTL_STATUS in
amdgpu_ring_commit() behind set_wptr that confirm the status as "0" or "2"?


You could add an end_use() callback for that, but I think we rather need 
to do this in gfx_v9_0_ring_set_wptr_gfx() before we write the doorbell.


Christian.



Thanks,
Ray


___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] drm/amd/display: avoid sleeping in atomic context while creating new context or state

2018-06-01 Thread S, Shirish


The V2 of this patch is already reviewed by Harry.
The change i have made in dc_create() is no more applicable.

Regards,
Shirish S
On 5/31/2018 11:35 PM, Christian König wrote:

Am 30.05.2018 um 18:03 schrieb Harry Wentland:

On 2018-05-30 06:17 AM, Shirish S wrote:

This patch fixes the warning messages that are caused due to calling
sleep in atomic context as below:

BUG: sleeping function called from invalid context at mm/slab.h:419
in_atomic(): 1, irqs_disabled(): 1, pid: 5, name: kworker/u4:0
CPU: 1 PID: 5 Comm: kworker/u4:0 Tainted: G    W 4.14.35 #941
Workqueue: events_unbound commit_work
Call Trace:
  dump_stack+0x4d/0x63
  ___might_sleep+0x11f/0x12e
  kmem_cache_alloc_trace+0x41/0xea
  dc_create_state+0x1f/0x30
  dc_commit_updates_for_stream+0x73/0x4cf
  ? amdgpu_get_crtc_scanoutpos+0x82/0x16b
  amdgpu_dm_do_flip+0x239/0x298
  amdgpu_dm_commit_planes.isra.23+0x379/0x54b
  ? dc_commit_state+0x3da/0x404
  amdgpu_dm_atomic_commit_tail+0x4fc/0x5d2
  ? wait_for_common+0x5b/0x69
  commit_tail+0x42/0x64
  process_one_work+0x1b0/0x314
  worker_thread+0x1cb/0x2c1
  ? create_worker+0x1da/0x1da
  kthread+0x156/0x15e
  ? kthread_flush_work+0xea/0xea
  ret_from_fork+0x22/0x40

Signed-off-by: Shirish S 
---
  drivers/gpu/drm/amd/display/dc/core/dc.c | 4 ++--
  1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c 
b/drivers/gpu/drm/amd/display/dc/core/dc.c

index 33149ed..d62206f 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc.c
@@ -588,7 +588,7 @@ static void disable_dangling_plane(struct dc 
*dc, struct dc_state *context)

    struct dc *dc_create(const struct dc_init_data *init_params)
   {
-    struct dc *dc = kzalloc(sizeof(*dc), GFP_KERNEL);
+    struct dc *dc = kzalloc(sizeof(*dc), GFP_ATOMIC);

Are you sure this one can be called in atomic_context?

If so then everything in consstruct() would also need GFP_ATOMIC.


Well the backtrace is quite obvious, but I agree that change still 
looks fishy to me as well.


Using GFP_ATOMIC should only be a last resort when nothing else helps, 
but here it looks more like we misuse a spinlock where a mutex or 
semaphore would be more appropriate.


Where exactly becomes the context atomic in the call trace?

Christian.



Harry


  unsigned int full_pipe_count;
    if (NULL == dc)
@@ -937,7 +937,7 @@ bool dc_post_update_surfaces_to_stream(struct dc 
*dc)

  struct dc_state *dc_create_state(void)
  {
  struct dc_state *context = kzalloc(sizeof(struct dc_state),
-   GFP_KERNEL);
+   GFP_ATOMIC);
    if (!context)
  return NULL;


___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx




--
Regards,
Shirish S

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] drm/amdgpu: fix ib test hang with gfxoff enabled

2018-06-01 Thread Huang Rui
On Fri, Jun 01, 2018 at 05:13:49PM +0800, Christian König wrote:
> Am 01.06.2018 um 08:41 schrieb Huang Rui:
> > After defer the execution of gfx/compute ib tests. However, at that time, 
> > the
> > gfx already go into "mid state" of gfxoff.
> >
> > PWR_MISC_CNTL_STATUS: PWR_GFXOFF_STATUS field (2:1 bits)
> > 0 = GFXOFF.
> > 1 = Transition out of GFXOFF state.
> > 2 = Not in GFXOFF.
> > 3 = Transition into GFXOFF.
> >
> > If hit the mid state (1 or 3), the doorbell writing interrupt cannot wake 
> > up the
> > gfx back successfully. And the field value is 1 when we issue the ib test at
> > that, so we got the hang. This is the root cause that we encountered the 
> > issue.
> >
> > Meanwhile, we cannot set clockgating of GFX after gfx is already in "off" 
> > state.
> > So here we should move the gfx powergating and gfxoff enabling behavior at 
> > the
> > end of initialization behind ib test and clockgating.
> 
> Mhm, that still looks like a only halve backed solution:
> 
> 1. What prevents this bug from happening during "normal" IB submission 
> from userspace?
> 
> 2. Shouldn't we poll the PWR_MISC_CNTL_STATUS register to make sure we 
> are not in any transition phase instead?
> 

Yes, right. How about also add polling of PWR_MISC_CNTL_STATUS in
amdgpu_ring_commit() behind set_wptr that confirm the status as "0" or "2"?

Thanks,
Ray
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] drm/amdgpu: fix ib test hang with gfxoff enabled

2018-06-01 Thread Christian König

Am 01.06.2018 um 08:41 schrieb Huang Rui:

After defer the execution of gfx/compute ib tests. However, at that time, the
gfx already go into "mid state" of gfxoff.

PWR_MISC_CNTL_STATUS: PWR_GFXOFF_STATUS field (2:1 bits)
0 = GFXOFF.
1 = Transition out of GFXOFF state.
2 = Not in GFXOFF.
3 = Transition into GFXOFF.

If hit the mid state (1 or 3), the doorbell writing interrupt cannot wake up the
gfx back successfully. And the field value is 1 when we issue the ib test at
that, so we got the hang. This is the root cause that we encountered the issue.

Meanwhile, we cannot set clockgating of GFX after gfx is already in "off" state.
So here we should move the gfx powergating and gfxoff enabling behavior at the
end of initialization behind ib test and clockgating.


Mhm, that still looks like a only halve backed solution:

1. What prevents this bug from happening during "normal" IB submission 
from userspace?


2. Shouldn't we poll the PWR_MISC_CNTL_STATUS register to make sure we 
are not in any transition phase instead?


Regards,
Christian.



Signed-off-by: Huang Rui 
Cc: Hawking Zhang 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c| 10 ++
  drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c |  5 -
  drivers/gpu/drm/amd/powerplay/amd_powerplay.c |  2 +-
  drivers/gpu/drm/amd/powerplay/hwmgr/smu10_hwmgr.c |  4 ++--
  4 files changed, 13 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index f509d32..e1c8806 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -1723,6 +1723,16 @@ static int amdgpu_device_ip_late_set_cg_state(struct 
amdgpu_device *adev)
}
}
}
+
+   if (adev->powerplay.pp_feature & PP_GFXOFF_MASK) {
+   amdgpu_device_ip_set_powergating_state(adev,
+  AMD_IP_BLOCK_TYPE_GFX,
+  AMD_CG_STATE_GATE);
+   amdgpu_device_ip_set_powergating_state(adev,
+  AMD_IP_BLOCK_TYPE_SMC,
+  AMD_CG_STATE_GATE);
+   }
+
return 0;
  }
  
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c

index 2c5e2a4..31ecc86 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -3358,11 +3358,6 @@ static int gfx_v9_0_late_init(void *handle)
if (r)
return r;
  
-	r = amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_GFX,

-  AMD_PG_STATE_GATE);
-   if (r)
-   return r;
-
return 0;
  }
  
diff --git a/drivers/gpu/drm/amd/powerplay/amd_powerplay.c b/drivers/gpu/drm/amd/powerplay/amd_powerplay.c

index b493369..d0e6e2d 100644
--- a/drivers/gpu/drm/amd/powerplay/amd_powerplay.c
+++ b/drivers/gpu/drm/amd/powerplay/amd_powerplay.c
@@ -245,7 +245,7 @@ static int pp_set_powergating_state(void *handle,
}
  
  	if (hwmgr->hwmgr_func->enable_per_cu_power_gating == NULL) {

-   pr_info("%s was not implemented.\n", __func__);
+   pr_debug("%s was not implemented.\n", __func__);
return 0;
}
  
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu10_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/smu10_hwmgr.c

index 7712eb6..b72d089 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu10_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu10_hwmgr.c
@@ -284,7 +284,7 @@ static int smu10_disable_gfx_off(struct pp_hwmgr *hwmgr)
  
  static int smu10_disable_dpm_tasks(struct pp_hwmgr *hwmgr)

  {
-   return smu10_disable_gfx_off(hwmgr);
+   return 0;
  }
  
  static int smu10_enable_gfx_off(struct pp_hwmgr *hwmgr)

@@ -299,7 +299,7 @@ static int smu10_enable_gfx_off(struct pp_hwmgr *hwmgr)
  
  static int smu10_enable_dpm_tasks(struct pp_hwmgr *hwmgr)

  {
-   return smu10_enable_gfx_off(hwmgr);
+   return 0;
  }
  
  static int smu10_gfx_off_control(struct pp_hwmgr *hwmgr, bool enable)


___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] drm/amdgpu: add kernel DOC for ioctls in amdgpu_cs.c file

2018-06-01 Thread Michel Dänzer
On 2018-05-30 08:42 PM, Leo Liu wrote:
> There are four ioctls in this files, and DOC gives details of
> data structures for each of ioctls, and their functionalities.
> 
> Signed-off-by: Leo Liu 
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 142 +
>  1 file changed, 142 insertions(+)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> index 12f0d18c6ee8..343ff115cff1 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> @@ -1217,6 +1217,49 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
>   return 0;
>  }
>  
> +/**
> + * DOC:  amdgpu_cs_ioctl
> + *
> + * This ioctl processes user space command submission chunks,
> + * return a fence sequence number associated to a amdgpu fence.
> + *
> + * In data structure:
> + *
> + * __u32 ctx_id:
> + * Integer ID, created via DRM_AMDGPU_CTX ioctl call when user space
> + * command submission context created. It will be used as ID for later
> + * command submission context.
> + *
> + * __u32 bo_list_handle:
> + * Handle of resources list associated with this CS, created via
> + * DRM_AMDGPU_BO_LIST ioctl call before command submission, and
> + * the BOs in the list will be validated.
> + *
> + * __u32 num_chunks:
> + * Number of chunks, their types include:
> + * AMDGPU_CHUNK_ID_IB
> + * The data will be filled into IB buffer, and mappped to a HW ring.
> + *
> + * AMDGPU_CHUNK_ID_FENCE
> + * The data will be used to find user fence BO and its offset.
> + *
> + * AMDGPU_CHUNK_ID_DEPENDENCIES
> + * AMDGPU_CHUNK_ID_SYNCOBJ_IN
> + * AMDGPU_CHUNK_ID_SYNCOBJ_OUT
> + * These will be parsed as fence dependencies in given requirement,
> + * and will be remembered and to be synced later.
> + *
> + * __u32 _pad:
> + *
> + * __u64 chunks:
> + * Point to the CS chunks.

BTW, this kind of formatting isn't preserved in the generated
documentation, so it probably won't look as intended.


-- 
Earthling Michel Dänzer   |   http://www.amd.com
Libre software enthusiast | Mesa and X developer
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] drm/amdgpu: add kernel DOC for ioctls in amdgpu_cs.c file

2018-06-01 Thread Michel Dänzer
On 2018-05-31 08:02 PM, Leo Liu wrote:
> On 05/31/2018 01:04 PM, Michel Dänzer wrote:
>> On 2018-05-31 06:49 PM, Leo Liu wrote:
>>> On 05/31/2018 12:47 PM, Michel Dänzer wrote:
 On 2018-05-31 06:39 PM, Leo Liu wrote:
> On 05/31/2018 12:30 PM, Michel Dänzer wrote:
>> On 2018-05-30 08:42 PM, Leo Liu wrote:
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>>> index 12f0d18c6ee8..343ff115cff1 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>>> @@ -1217,6 +1217,49 @@ static int amdgpu_cs_submit(struct
>>> amdgpu_cs_parser *p,
>>>     return 0;
>>>     }
>>>     +/**
>>> + * DOC:  amdgpu_cs_ioctl
>> DOC comments shouldn't be used for functions, see
>> Documentation/output/doc-guide/kernel-doc.html#function-documentation
>>
> This doc is not for the functions, it's like something in commit
> message
> about the details of in/out data, and what is the functionality for
> this
> ioctl.
 Data structures should be documented in comments directly above their
 definitions, per
 Documentation/output/doc-guide/kernel-doc.html#structure-union-and-enumeration-documentation


 . What an ioctl does should be described in the function documentation
 comment above its implementation, as described above.
>>> Then what do you think the kernel doc for ioctl should do?
>> The data structure should be documented in a comment above its
>> definition, and what the ioctl does should be documented in a comment
>> above the function implementing it. The source files need to be hooked
>> up to the amdgpu.rst file.
>>
>> Does that answer your question?
> Sort of. It tells me what should not be in the kernel DOC. I will put
> those details about data and functionalities where they should be.
> And focus on what the ioctl is used for for the kernel DOC section.

I'm afraid I don't see DOC comments being useful for documenting ioctls.
They're for general/overview kind of documentation. For ioctls, the data
structure and function documentation comments should be sufficient. You
can reference a data structure in a function documentation comment like
this:

/**
 * amdgpu_cs_ioctl - ...
 * @...: ...
 *
 * ...
 *
 * The userspace parameters for this ioctl are passed in via
 * _amdgpu_cs.
 *
 * Returns:
 * ...
 */


See e.g. the documentation about i915_gem_get/set_tiling_ioctl in
drivers/gpu/drm/i915/i915_gem_tiling.c.


-- 
Earthling Michel Dänzer   |   http://www.amd.com
Libre software enthusiast | Mesa and X developer
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH] drm/amdgpu: fix ib test hang with gfxoff enabled

2018-06-01 Thread Huang Rui
After defer the execution of gfx/compute ib tests. However, at that time, the
gfx already go into "mid state" of gfxoff.

PWR_MISC_CNTL_STATUS: PWR_GFXOFF_STATUS field (2:1 bits)
0 = GFXOFF.
1 = Transition out of GFXOFF state.
2 = Not in GFXOFF.
3 = Transition into GFXOFF.

If hit the mid state (1 or 3), the doorbell writing interrupt cannot wake up the
gfx back successfully. And the field value is 1 when we issue the ib test at
that, so we got the hang. This is the root cause that we encountered the issue.

Meanwhile, we cannot set clockgating of GFX after gfx is already in "off" state.
So here we should move the gfx powergating and gfxoff enabling behavior at the
end of initialization behind ib test and clockgating.

Signed-off-by: Huang Rui 
Cc: Hawking Zhang 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c| 10 ++
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c |  5 -
 drivers/gpu/drm/amd/powerplay/amd_powerplay.c |  2 +-
 drivers/gpu/drm/amd/powerplay/hwmgr/smu10_hwmgr.c |  4 ++--
 4 files changed, 13 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index f509d32..e1c8806 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -1723,6 +1723,16 @@ static int amdgpu_device_ip_late_set_cg_state(struct 
amdgpu_device *adev)
}
}
}
+
+   if (adev->powerplay.pp_feature & PP_GFXOFF_MASK) {
+   amdgpu_device_ip_set_powergating_state(adev,
+  AMD_IP_BLOCK_TYPE_GFX,
+  AMD_CG_STATE_GATE);
+   amdgpu_device_ip_set_powergating_state(adev,
+  AMD_IP_BLOCK_TYPE_SMC,
+  AMD_CG_STATE_GATE);
+   }
+
return 0;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index 2c5e2a4..31ecc86 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -3358,11 +3358,6 @@ static int gfx_v9_0_late_init(void *handle)
if (r)
return r;
 
-   r = amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_GFX,
-  AMD_PG_STATE_GATE);
-   if (r)
-   return r;
-
return 0;
 }
 
diff --git a/drivers/gpu/drm/amd/powerplay/amd_powerplay.c 
b/drivers/gpu/drm/amd/powerplay/amd_powerplay.c
index b493369..d0e6e2d 100644
--- a/drivers/gpu/drm/amd/powerplay/amd_powerplay.c
+++ b/drivers/gpu/drm/amd/powerplay/amd_powerplay.c
@@ -245,7 +245,7 @@ static int pp_set_powergating_state(void *handle,
}
 
if (hwmgr->hwmgr_func->enable_per_cu_power_gating == NULL) {
-   pr_info("%s was not implemented.\n", __func__);
+   pr_debug("%s was not implemented.\n", __func__);
return 0;
}
 
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu10_hwmgr.c 
b/drivers/gpu/drm/amd/powerplay/hwmgr/smu10_hwmgr.c
index 7712eb6..b72d089 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu10_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu10_hwmgr.c
@@ -284,7 +284,7 @@ static int smu10_disable_gfx_off(struct pp_hwmgr *hwmgr)
 
 static int smu10_disable_dpm_tasks(struct pp_hwmgr *hwmgr)
 {
-   return smu10_disable_gfx_off(hwmgr);
+   return 0;
 }
 
 static int smu10_enable_gfx_off(struct pp_hwmgr *hwmgr)
@@ -299,7 +299,7 @@ static int smu10_enable_gfx_off(struct pp_hwmgr *hwmgr)
 
 static int smu10_enable_dpm_tasks(struct pp_hwmgr *hwmgr)
 {
-   return smu10_enable_gfx_off(hwmgr);
+   return 0;
 }
 
 static int smu10_gfx_off_control(struct pp_hwmgr *hwmgr, bool enable)
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx