Re: [PATCH v2] drm/amdgpu: add support for user trap handlers

2020-09-23 Thread Christian König

Am 23.09.20 um 14:52 schrieb Samuel Pitoiset:


On 8/28/20 10:23 AM, Christian König wrote:

Am 28.08.20 um 10:14 schrieb Samuel Pitoiset:


On 8/28/20 9:57 AM, Christian König wrote:

Am 25.08.20 um 16:07 schrieb Samuel Pitoiset:

A trap handler can be used by userspace to catch shader exceptions
like divide by zero, memory violations etc.

On GFX6-GFX8, the registers used to configure TBA/TMA aren't
privileged and can be configured from userpace.

On GFX9+ they are per VMID and privileged, only the KMD can
configure them. At the moment, we don't know how to set them
via the CP, so they are only emitted if a VMID is reserved.

This introduces a new CS chunk that can be used to set the
TBA/TMA virtual address at submit time.

TODO:
- rebase on top of amd-staging-drm-next (this branch currently
hangs my GPU at boot)


Please split that up into multiple patches. The first one adding 
the general infrastructure and the following one the implementation 
for gfx9 and gfx10.

Sounds good.


And maybe even support this for gfx6-8 even if it is not necessary? 
Looks trivial to implement and would give userspace a more uniform 
handling for this.
v1 added gfx6-8 support but I removed it in v2 as requested by Alex 
because it's useless and might be better for preemption.


A few more comments below.



Signed-off-by: Samuel Pitoiset 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c   | 31 


  drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c  |  3 ++-
  drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h  |  4 +++
  drivers/gpu/drm/amd/amdgpu/amdgpu_job.h  |  4 +++
  drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h |  4 +++
  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c   | 20 +--
  drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c   | 20 +++
  drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c    | 19 +++
  include/uapi/drm/amdgpu_drm.h    |  8 ++
  9 files changed, 110 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c

index a512ccbc4dea..6ca5c4912e3a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -104,6 +104,19 @@ static int amdgpu_cs_bo_handles_chunk(struct 
amdgpu_cs_parser *p,

  return r;
  }
  +static int amdgpu_cs_user_trap_chunk(struct amdgpu_cs_parser *p,
+ struct drm_amdgpu_cs_chunk_trap *data,
+ uint64_t *tba_addr, uint64_t *tma_addr)
+{
+    if (!data->tba_addr || !data->tma_addr)
+    return -EINVAL;
+
+    *tba_addr = data->tba_addr;
+    *tma_addr = data->tma_addr;
+
+    return 0;
+}
+
  static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, 
union drm_amdgpu_cs *cs)

  {
  struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
@@ -112,6 +125,7 @@ static int amdgpu_cs_parser_init(struct 
amdgpu_cs_parser *p, union drm_amdgpu_cs

  uint64_t *chunk_array;
  unsigned size, num_ibs = 0;
  uint32_t uf_offset = 0;
+    uint64_t tba_addr = 0, tma_addr = 0;
  int i;
  int ret;
  @@ -214,6 +228,19 @@ static int amdgpu_cs_parser_init(struct 
amdgpu_cs_parser *p, union drm_amdgpu_cs

    break;
  +    case AMDGPU_CHUNK_ID_TRAP:
+    size = sizeof(struct drm_amdgpu_cs_chunk_trap);
+    if (p->chunks[i].length_dw * sizeof(uint32_t) < size) {
+    ret = -EINVAL;
+    goto free_partial_kdata;
+    }
+
+    ret = amdgpu_cs_user_trap_chunk(p, p->chunks[i].kdata,
+    _addr, _addr);
+    if (ret)
+    goto free_partial_kdata;
+    break;
+
  case AMDGPU_CHUNK_ID_DEPENDENCIES:
  case AMDGPU_CHUNK_ID_SYNCOBJ_IN:
  case AMDGPU_CHUNK_ID_SYNCOBJ_OUT:
@@ -239,6 +266,10 @@ static int amdgpu_cs_parser_init(struct 
amdgpu_cs_parser *p, union drm_amdgpu_cs

    if (p->uf_entry.tv.bo)
  p->job->uf_addr = uf_offset;
+
+    p->job->tba_addr = tba_addr;
+    p->job->tma_addr = tma_addr;
+
  kfree(chunk_array);
    /* Use this opportunity to fill in task info for the vm */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c

index 26127c7d2f32..1e703119e4c2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -88,9 +88,10 @@
   * - 3.37.0 - L2 is invalidated before SDMA IBs, needed for 
correctness

   * - 3.38.0 - Add AMDGPU_IB_FLAG_EMIT_MEM_SYNC
   * - 3.39.0 - DMABUF implicit sync does a full pipeline sync
+ * - 3.40.0 - Add AMDGPU_CHUNK_ID_TRAP
   */
  #define KMS_DRIVER_MAJOR    3
-#define KMS_DRIVER_MINOR    39
+#define KMS_DRIVER_MINOR    40
  #define KMS_DRIVER_PATCHLEVEL    0
    int amdgpu_vram_limit = 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h

index 8e58325bbca2..fd0d56724b4d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h
@@ -58,6 +58,10 @@ struct 

Re: [PATCH v2] drm/amdgpu: add support for user trap handlers

2020-09-23 Thread Samuel Pitoiset


On 8/28/20 10:23 AM, Christian König wrote:

Am 28.08.20 um 10:14 schrieb Samuel Pitoiset:


On 8/28/20 9:57 AM, Christian König wrote:

Am 25.08.20 um 16:07 schrieb Samuel Pitoiset:

A trap handler can be used by userspace to catch shader exceptions
like divide by zero, memory violations etc.

On GFX6-GFX8, the registers used to configure TBA/TMA aren't
privileged and can be configured from userpace.

On GFX9+ they are per VMID and privileged, only the KMD can
configure them. At the moment, we don't know how to set them
via the CP, so they are only emitted if a VMID is reserved.

This introduces a new CS chunk that can be used to set the
TBA/TMA virtual address at submit time.

TODO:
- rebase on top of amd-staging-drm-next (this branch currently
hangs my GPU at boot)


Please split that up into multiple patches. The first one adding the 
general infrastructure and the following one the implementation for 
gfx9 and gfx10.

Sounds good.


And maybe even support this for gfx6-8 even if it is not necessary? 
Looks trivial to implement and would give userspace a more uniform 
handling for this.
v1 added gfx6-8 support but I removed it in v2 as requested by Alex 
because it's useless and might be better for preemption.


A few more comments below.



Signed-off-by: Samuel Pitoiset 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c   | 31 


  drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c  |  3 ++-
  drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h  |  4 +++
  drivers/gpu/drm/amd/amdgpu/amdgpu_job.h  |  4 +++
  drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h |  4 +++
  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c   | 20 +--
  drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c   | 20 +++
  drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c    | 19 +++
  include/uapi/drm/amdgpu_drm.h    |  8 ++
  9 files changed, 110 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c

index a512ccbc4dea..6ca5c4912e3a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -104,6 +104,19 @@ static int amdgpu_cs_bo_handles_chunk(struct 
amdgpu_cs_parser *p,

  return r;
  }
  +static int amdgpu_cs_user_trap_chunk(struct amdgpu_cs_parser *p,
+ struct drm_amdgpu_cs_chunk_trap *data,
+ uint64_t *tba_addr, uint64_t *tma_addr)
+{
+    if (!data->tba_addr || !data->tma_addr)
+    return -EINVAL;
+
+    *tba_addr = data->tba_addr;
+    *tma_addr = data->tma_addr;
+
+    return 0;
+}
+
  static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, 
union drm_amdgpu_cs *cs)

  {
  struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
@@ -112,6 +125,7 @@ static int amdgpu_cs_parser_init(struct 
amdgpu_cs_parser *p, union drm_amdgpu_cs

  uint64_t *chunk_array;
  unsigned size, num_ibs = 0;
  uint32_t uf_offset = 0;
+    uint64_t tba_addr = 0, tma_addr = 0;
  int i;
  int ret;
  @@ -214,6 +228,19 @@ static int amdgpu_cs_parser_init(struct 
amdgpu_cs_parser *p, union drm_amdgpu_cs

    break;
  +    case AMDGPU_CHUNK_ID_TRAP:
+    size = sizeof(struct drm_amdgpu_cs_chunk_trap);
+    if (p->chunks[i].length_dw * sizeof(uint32_t) < size) {
+    ret = -EINVAL;
+    goto free_partial_kdata;
+    }
+
+    ret = amdgpu_cs_user_trap_chunk(p, p->chunks[i].kdata,
+    _addr, _addr);
+    if (ret)
+    goto free_partial_kdata;
+    break;
+
  case AMDGPU_CHUNK_ID_DEPENDENCIES:
  case AMDGPU_CHUNK_ID_SYNCOBJ_IN:
  case AMDGPU_CHUNK_ID_SYNCOBJ_OUT:
@@ -239,6 +266,10 @@ static int amdgpu_cs_parser_init(struct 
amdgpu_cs_parser *p, union drm_amdgpu_cs

    if (p->uf_entry.tv.bo)
  p->job->uf_addr = uf_offset;
+
+    p->job->tba_addr = tba_addr;
+    p->job->tma_addr = tma_addr;
+
  kfree(chunk_array);
    /* Use this opportunity to fill in task info for the vm */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c

index 26127c7d2f32..1e703119e4c2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -88,9 +88,10 @@
   * - 3.37.0 - L2 is invalidated before SDMA IBs, needed for 
correctness

   * - 3.38.0 - Add AMDGPU_IB_FLAG_EMIT_MEM_SYNC
   * - 3.39.0 - DMABUF implicit sync does a full pipeline sync
+ * - 3.40.0 - Add AMDGPU_CHUNK_ID_TRAP
   */
  #define KMS_DRIVER_MAJOR    3
-#define KMS_DRIVER_MINOR    39
+#define KMS_DRIVER_MINOR    40
  #define KMS_DRIVER_PATCHLEVEL    0
    int amdgpu_vram_limit = 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h

index 8e58325bbca2..fd0d56724b4d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h
@@ -58,6 +58,10 @@ struct amdgpu_vmid {
  uint32_t    oa_base;
   

Re: [PATCH v2] drm/amdgpu: add support for user trap handlers

2020-08-28 Thread Samuel Pitoiset


On 8/28/20 10:23 AM, Christian König wrote:

Am 28.08.20 um 10:14 schrieb Samuel Pitoiset:


On 8/28/20 9:57 AM, Christian König wrote:

Am 25.08.20 um 16:07 schrieb Samuel Pitoiset:

A trap handler can be used by userspace to catch shader exceptions
like divide by zero, memory violations etc.

On GFX6-GFX8, the registers used to configure TBA/TMA aren't
privileged and can be configured from userpace.

On GFX9+ they are per VMID and privileged, only the KMD can
configure them. At the moment, we don't know how to set them
via the CP, so they are only emitted if a VMID is reserved.

This introduces a new CS chunk that can be used to set the
TBA/TMA virtual address at submit time.

TODO:
- rebase on top of amd-staging-drm-next (this branch currently
hangs my GPU at boot)


Please split that up into multiple patches. The first one adding the 
general infrastructure and the following one the implementation for 
gfx9 and gfx10.

Sounds good.


And maybe even support this for gfx6-8 even if it is not necessary? 
Looks trivial to implement and would give userspace a more uniform 
handling for this.
v1 added gfx6-8 support but I removed it in v2 as requested by Alex 
because it's useless and might be better for preemption.


A few more comments below.



Signed-off-by: Samuel Pitoiset 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c   | 31 


  drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c  |  3 ++-
  drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h  |  4 +++
  drivers/gpu/drm/amd/amdgpu/amdgpu_job.h  |  4 +++
  drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h |  4 +++
  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c   | 20 +--
  drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c   | 20 +++
  drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c    | 19 +++
  include/uapi/drm/amdgpu_drm.h    |  8 ++
  9 files changed, 110 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c

index a512ccbc4dea..6ca5c4912e3a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -104,6 +104,19 @@ static int amdgpu_cs_bo_handles_chunk(struct 
amdgpu_cs_parser *p,

  return r;
  }
  +static int amdgpu_cs_user_trap_chunk(struct amdgpu_cs_parser *p,
+ struct drm_amdgpu_cs_chunk_trap *data,
+ uint64_t *tba_addr, uint64_t *tma_addr)
+{
+    if (!data->tba_addr || !data->tma_addr)
+    return -EINVAL;
+
+    *tba_addr = data->tba_addr;
+    *tma_addr = data->tma_addr;
+
+    return 0;
+}
+
  static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, 
union drm_amdgpu_cs *cs)

  {
  struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
@@ -112,6 +125,7 @@ static int amdgpu_cs_parser_init(struct 
amdgpu_cs_parser *p, union drm_amdgpu_cs

  uint64_t *chunk_array;
  unsigned size, num_ibs = 0;
  uint32_t uf_offset = 0;
+    uint64_t tba_addr = 0, tma_addr = 0;
  int i;
  int ret;
  @@ -214,6 +228,19 @@ static int amdgpu_cs_parser_init(struct 
amdgpu_cs_parser *p, union drm_amdgpu_cs

    break;
  +    case AMDGPU_CHUNK_ID_TRAP:
+    size = sizeof(struct drm_amdgpu_cs_chunk_trap);
+    if (p->chunks[i].length_dw * sizeof(uint32_t) < size) {
+    ret = -EINVAL;
+    goto free_partial_kdata;
+    }
+
+    ret = amdgpu_cs_user_trap_chunk(p, p->chunks[i].kdata,
+    _addr, _addr);
+    if (ret)
+    goto free_partial_kdata;
+    break;
+
  case AMDGPU_CHUNK_ID_DEPENDENCIES:
  case AMDGPU_CHUNK_ID_SYNCOBJ_IN:
  case AMDGPU_CHUNK_ID_SYNCOBJ_OUT:
@@ -239,6 +266,10 @@ static int amdgpu_cs_parser_init(struct 
amdgpu_cs_parser *p, union drm_amdgpu_cs

    if (p->uf_entry.tv.bo)
  p->job->uf_addr = uf_offset;
+
+    p->job->tba_addr = tba_addr;
+    p->job->tma_addr = tma_addr;
+
  kfree(chunk_array);
    /* Use this opportunity to fill in task info for the vm */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c

index 26127c7d2f32..1e703119e4c2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -88,9 +88,10 @@
   * - 3.37.0 - L2 is invalidated before SDMA IBs, needed for 
correctness

   * - 3.38.0 - Add AMDGPU_IB_FLAG_EMIT_MEM_SYNC
   * - 3.39.0 - DMABUF implicit sync does a full pipeline sync
+ * - 3.40.0 - Add AMDGPU_CHUNK_ID_TRAP
   */
  #define KMS_DRIVER_MAJOR    3
-#define KMS_DRIVER_MINOR    39
+#define KMS_DRIVER_MINOR    40
  #define KMS_DRIVER_PATCHLEVEL    0
    int amdgpu_vram_limit = 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h

index 8e58325bbca2..fd0d56724b4d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h
@@ -58,6 +58,10 @@ struct amdgpu_vmid {
  uint32_t    oa_base;
   

Re: [PATCH v2] drm/amdgpu: add support for user trap handlers

2020-08-28 Thread Christian König

Am 28.08.20 um 10:14 schrieb Samuel Pitoiset:


On 8/28/20 9:57 AM, Christian König wrote:

Am 25.08.20 um 16:07 schrieb Samuel Pitoiset:

A trap handler can be used by userspace to catch shader exceptions
like divide by zero, memory violations etc.

On GFX6-GFX8, the registers used to configure TBA/TMA aren't
privileged and can be configured from userpace.

On GFX9+ they are per VMID and privileged, only the KMD can
configure them. At the moment, we don't know how to set them
via the CP, so they are only emitted if a VMID is reserved.

This introduces a new CS chunk that can be used to set the
TBA/TMA virtual address at submit time.

TODO:
- rebase on top of amd-staging-drm-next (this branch currently
hangs my GPU at boot)


Please split that up into multiple patches. The first one adding the 
general infrastructure and the following one the implementation for 
gfx9 and gfx10.

Sounds good.


And maybe even support this for gfx6-8 even if it is not necessary? 
Looks trivial to implement and would give userspace a more uniform 
handling for this.
v1 added gfx6-8 support but I removed it in v2 as requested by Alex 
because it's useless and might be better for preemption.


A few more comments below.



Signed-off-by: Samuel Pitoiset 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c   | 31 


  drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c  |  3 ++-
  drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h  |  4 +++
  drivers/gpu/drm/amd/amdgpu/amdgpu_job.h  |  4 +++
  drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h |  4 +++
  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c   | 20 +--
  drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c   | 20 +++
  drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c    | 19 +++
  include/uapi/drm/amdgpu_drm.h    |  8 ++
  9 files changed, 110 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c

index a512ccbc4dea..6ca5c4912e3a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -104,6 +104,19 @@ static int amdgpu_cs_bo_handles_chunk(struct 
amdgpu_cs_parser *p,

  return r;
  }
  +static int amdgpu_cs_user_trap_chunk(struct amdgpu_cs_parser *p,
+ struct drm_amdgpu_cs_chunk_trap *data,
+ uint64_t *tba_addr, uint64_t *tma_addr)
+{
+    if (!data->tba_addr || !data->tma_addr)
+    return -EINVAL;
+
+    *tba_addr = data->tba_addr;
+    *tma_addr = data->tma_addr;
+
+    return 0;
+}
+
  static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, union 
drm_amdgpu_cs *cs)

  {
  struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
@@ -112,6 +125,7 @@ static int amdgpu_cs_parser_init(struct 
amdgpu_cs_parser *p, union drm_amdgpu_cs

  uint64_t *chunk_array;
  unsigned size, num_ibs = 0;
  uint32_t uf_offset = 0;
+    uint64_t tba_addr = 0, tma_addr = 0;
  int i;
  int ret;
  @@ -214,6 +228,19 @@ static int amdgpu_cs_parser_init(struct 
amdgpu_cs_parser *p, union drm_amdgpu_cs

    break;
  +    case AMDGPU_CHUNK_ID_TRAP:
+    size = sizeof(struct drm_amdgpu_cs_chunk_trap);
+    if (p->chunks[i].length_dw * sizeof(uint32_t) < size) {
+    ret = -EINVAL;
+    goto free_partial_kdata;
+    }
+
+    ret = amdgpu_cs_user_trap_chunk(p, p->chunks[i].kdata,
+    _addr, _addr);
+    if (ret)
+    goto free_partial_kdata;
+    break;
+
  case AMDGPU_CHUNK_ID_DEPENDENCIES:
  case AMDGPU_CHUNK_ID_SYNCOBJ_IN:
  case AMDGPU_CHUNK_ID_SYNCOBJ_OUT:
@@ -239,6 +266,10 @@ static int amdgpu_cs_parser_init(struct 
amdgpu_cs_parser *p, union drm_amdgpu_cs

    if (p->uf_entry.tv.bo)
  p->job->uf_addr = uf_offset;
+
+    p->job->tba_addr = tba_addr;
+    p->job->tma_addr = tma_addr;
+
  kfree(chunk_array);
    /* Use this opportunity to fill in task info for the vm */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c

index 26127c7d2f32..1e703119e4c2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -88,9 +88,10 @@
   * - 3.37.0 - L2 is invalidated before SDMA IBs, needed for 
correctness

   * - 3.38.0 - Add AMDGPU_IB_FLAG_EMIT_MEM_SYNC
   * - 3.39.0 - DMABUF implicit sync does a full pipeline sync
+ * - 3.40.0 - Add AMDGPU_CHUNK_ID_TRAP
   */
  #define KMS_DRIVER_MAJOR    3
-#define KMS_DRIVER_MINOR    39
+#define KMS_DRIVER_MINOR    40
  #define KMS_DRIVER_PATCHLEVEL    0
    int amdgpu_vram_limit = 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h

index 8e58325bbca2..fd0d56724b4d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h
@@ -58,6 +58,10 @@ struct amdgpu_vmid {
  uint32_t    oa_base;
  uint32_t    oa_size;
  +    /* user 

Re: [PATCH v2] drm/amdgpu: add support for user trap handlers

2020-08-28 Thread Samuel Pitoiset


On 8/28/20 9:57 AM, Christian König wrote:

Am 25.08.20 um 16:07 schrieb Samuel Pitoiset:

A trap handler can be used by userspace to catch shader exceptions
like divide by zero, memory violations etc.

On GFX6-GFX8, the registers used to configure TBA/TMA aren't
privileged and can be configured from userpace.

On GFX9+ they are per VMID and privileged, only the KMD can
configure them. At the moment, we don't know how to set them
via the CP, so they are only emitted if a VMID is reserved.

This introduces a new CS chunk that can be used to set the
TBA/TMA virtual address at submit time.

TODO:
- rebase on top of amd-staging-drm-next (this branch currently
hangs my GPU at boot)


Please split that up into multiple patches. The first one adding the 
general infrastructure and the following one the implementation for 
gfx9 and gfx10.

Sounds good.


And maybe even support this for gfx6-8 even if it is not necessary? 
Looks trivial to implement and would give userspace a more uniform 
handling for this.
v1 added gfx6-8 support but I removed it in v2 as requested by Alex 
because it's useless and might be better for preemption.


A few more comments below.



Signed-off-by: Samuel Pitoiset 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c   | 31 
  drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c  |  3 ++-
  drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h  |  4 +++
  drivers/gpu/drm/amd/amdgpu/amdgpu_job.h  |  4 +++
  drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h |  4 +++
  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c   | 20 +--
  drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c   | 20 +++
  drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c    | 19 +++
  include/uapi/drm/amdgpu_drm.h    |  8 ++
  9 files changed, 110 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c

index a512ccbc4dea..6ca5c4912e3a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -104,6 +104,19 @@ static int amdgpu_cs_bo_handles_chunk(struct 
amdgpu_cs_parser *p,

  return r;
  }
  +static int amdgpu_cs_user_trap_chunk(struct amdgpu_cs_parser *p,
+ struct drm_amdgpu_cs_chunk_trap *data,
+ uint64_t *tba_addr, uint64_t *tma_addr)
+{
+    if (!data->tba_addr || !data->tma_addr)
+    return -EINVAL;
+
+    *tba_addr = data->tba_addr;
+    *tma_addr = data->tma_addr;
+
+    return 0;
+}
+
  static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, union 
drm_amdgpu_cs *cs)

  {
  struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
@@ -112,6 +125,7 @@ static int amdgpu_cs_parser_init(struct 
amdgpu_cs_parser *p, union drm_amdgpu_cs

  uint64_t *chunk_array;
  unsigned size, num_ibs = 0;
  uint32_t uf_offset = 0;
+    uint64_t tba_addr = 0, tma_addr = 0;
  int i;
  int ret;
  @@ -214,6 +228,19 @@ static int amdgpu_cs_parser_init(struct 
amdgpu_cs_parser *p, union drm_amdgpu_cs

    break;
  +    case AMDGPU_CHUNK_ID_TRAP:
+    size = sizeof(struct drm_amdgpu_cs_chunk_trap);
+    if (p->chunks[i].length_dw * sizeof(uint32_t) < size) {
+    ret = -EINVAL;
+    goto free_partial_kdata;
+    }
+
+    ret = amdgpu_cs_user_trap_chunk(p, p->chunks[i].kdata,
+    _addr, _addr);
+    if (ret)
+    goto free_partial_kdata;
+    break;
+
  case AMDGPU_CHUNK_ID_DEPENDENCIES:
  case AMDGPU_CHUNK_ID_SYNCOBJ_IN:
  case AMDGPU_CHUNK_ID_SYNCOBJ_OUT:
@@ -239,6 +266,10 @@ static int amdgpu_cs_parser_init(struct 
amdgpu_cs_parser *p, union drm_amdgpu_cs

    if (p->uf_entry.tv.bo)
  p->job->uf_addr = uf_offset;
+
+    p->job->tba_addr = tba_addr;
+    p->job->tma_addr = tma_addr;
+
  kfree(chunk_array);
    /* Use this opportunity to fill in task info for the vm */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c

index 26127c7d2f32..1e703119e4c2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -88,9 +88,10 @@
   * - 3.37.0 - L2 is invalidated before SDMA IBs, needed for 
correctness

   * - 3.38.0 - Add AMDGPU_IB_FLAG_EMIT_MEM_SYNC
   * - 3.39.0 - DMABUF implicit sync does a full pipeline sync
+ * - 3.40.0 - Add AMDGPU_CHUNK_ID_TRAP
   */
  #define KMS_DRIVER_MAJOR    3
-#define KMS_DRIVER_MINOR    39
+#define KMS_DRIVER_MINOR    40
  #define KMS_DRIVER_PATCHLEVEL    0
    int amdgpu_vram_limit = 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h

index 8e58325bbca2..fd0d56724b4d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h
@@ -58,6 +58,10 @@ struct amdgpu_vmid {
  uint32_t    oa_base;
  uint32_t    oa_size;
  +    /* user trap */
+    uint64_t    tba_addr;
+    

Re: [PATCH v2] drm/amdgpu: add support for user trap handlers

2020-08-28 Thread Christian König

Am 25.08.20 um 16:07 schrieb Samuel Pitoiset:

A trap handler can be used by userspace to catch shader exceptions
like divide by zero, memory violations etc.

On GFX6-GFX8, the registers used to configure TBA/TMA aren't
privileged and can be configured from userpace.

On GFX9+ they are per VMID and privileged, only the KMD can
configure them. At the moment, we don't know how to set them
via the CP, so they are only emitted if a VMID is reserved.

This introduces a new CS chunk that can be used to set the
TBA/TMA virtual address at submit time.

TODO:
- rebase on top of amd-staging-drm-next (this branch currently
hangs my GPU at boot)


Please split that up into multiple patches. The first one adding the 
general infrastructure and the following one the implementation for gfx9 
and gfx10.


And maybe even support this for gfx6-8 even if it is not necessary? 
Looks trivial to implement and would give userspace a more uniform 
handling for this.


A few more comments below.



Signed-off-by: Samuel Pitoiset 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c   | 31 
  drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c  |  3 ++-
  drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h  |  4 +++
  drivers/gpu/drm/amd/amdgpu/amdgpu_job.h  |  4 +++
  drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h |  4 +++
  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c   | 20 +--
  drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c   | 20 +++
  drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c| 19 +++
  include/uapi/drm/amdgpu_drm.h|  8 ++
  9 files changed, 110 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index a512ccbc4dea..6ca5c4912e3a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -104,6 +104,19 @@ static int amdgpu_cs_bo_handles_chunk(struct 
amdgpu_cs_parser *p,
return r;
  }
  
+static int amdgpu_cs_user_trap_chunk(struct amdgpu_cs_parser *p,

+struct drm_amdgpu_cs_chunk_trap *data,
+uint64_t *tba_addr, uint64_t *tma_addr)
+{
+   if (!data->tba_addr || !data->tma_addr)
+   return -EINVAL;
+
+   *tba_addr = data->tba_addr;
+   *tma_addr = data->tma_addr;
+
+   return 0;
+}
+
  static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, union 
drm_amdgpu_cs *cs)
  {
struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
@@ -112,6 +125,7 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser 
*p, union drm_amdgpu_cs
uint64_t *chunk_array;
unsigned size, num_ibs = 0;
uint32_t uf_offset = 0;
+   uint64_t tba_addr = 0, tma_addr = 0;
int i;
int ret;
  
@@ -214,6 +228,19 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, union drm_amdgpu_cs
  
  			break;
  
+		case AMDGPU_CHUNK_ID_TRAP:

+   size = sizeof(struct drm_amdgpu_cs_chunk_trap);
+   if (p->chunks[i].length_dw * sizeof(uint32_t) < size) {
+   ret = -EINVAL;
+   goto free_partial_kdata;
+   }
+
+   ret = amdgpu_cs_user_trap_chunk(p, p->chunks[i].kdata,
+   _addr, _addr);
+   if (ret)
+   goto free_partial_kdata;
+   break;
+
case AMDGPU_CHUNK_ID_DEPENDENCIES:
case AMDGPU_CHUNK_ID_SYNCOBJ_IN:
case AMDGPU_CHUNK_ID_SYNCOBJ_OUT:
@@ -239,6 +266,10 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser 
*p, union drm_amdgpu_cs
  
  	if (p->uf_entry.tv.bo)

p->job->uf_addr = uf_offset;
+
+   p->job->tba_addr = tba_addr;
+   p->job->tma_addr = tma_addr;
+
kfree(chunk_array);
  
  	/* Use this opportunity to fill in task info for the vm */

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 26127c7d2f32..1e703119e4c2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -88,9 +88,10 @@
   * - 3.37.0 - L2 is invalidated before SDMA IBs, needed for correctness
   * - 3.38.0 - Add AMDGPU_IB_FLAG_EMIT_MEM_SYNC
   * - 3.39.0 - DMABUF implicit sync does a full pipeline sync
+ * - 3.40.0 - Add AMDGPU_CHUNK_ID_TRAP
   */
  #define KMS_DRIVER_MAJOR  3
-#define KMS_DRIVER_MINOR   39
+#define KMS_DRIVER_MINOR   40
  #define KMS_DRIVER_PATCHLEVEL 0
  
  int amdgpu_vram_limit = 0;

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h
index 8e58325bbca2..fd0d56724b4d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h
@@ -58,6 +58,10 @@ struct amdgpu_vmid {
uint32_toa_base;
uint32_toa_size;
  
+	/* 

[PATCH v2] drm/amdgpu: add support for user trap handlers

2020-08-25 Thread Samuel Pitoiset
A trap handler can be used by userspace to catch shader exceptions
like divide by zero, memory violations etc.

On GFX6-GFX8, the registers used to configure TBA/TMA aren't
privileged and can be configured from userpace.

On GFX9+ they are per VMID and privileged, only the KMD can
configure them. At the moment, we don't know how to set them
via the CP, so they are only emitted if a VMID is reserved.

This introduces a new CS chunk that can be used to set the
TBA/TMA virtual address at submit time.

TODO:
- rebase on top of amd-staging-drm-next (this branch currently
hangs my GPU at boot)

Signed-off-by: Samuel Pitoiset 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c   | 31 
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c  |  3 ++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h  |  4 +++
 drivers/gpu/drm/amd/amdgpu/amdgpu_job.h  |  4 +++
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h |  4 +++
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c   | 20 +--
 drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c   | 20 +++
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c| 19 +++
 include/uapi/drm/amdgpu_drm.h|  8 ++
 9 files changed, 110 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index a512ccbc4dea..6ca5c4912e3a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -104,6 +104,19 @@ static int amdgpu_cs_bo_handles_chunk(struct 
amdgpu_cs_parser *p,
return r;
 }
 
+static int amdgpu_cs_user_trap_chunk(struct amdgpu_cs_parser *p,
+struct drm_amdgpu_cs_chunk_trap *data,
+uint64_t *tba_addr, uint64_t *tma_addr)
+{
+   if (!data->tba_addr || !data->tma_addr)
+   return -EINVAL;
+
+   *tba_addr = data->tba_addr;
+   *tma_addr = data->tma_addr;
+
+   return 0;
+}
+
 static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, union 
drm_amdgpu_cs *cs)
 {
struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
@@ -112,6 +125,7 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser 
*p, union drm_amdgpu_cs
uint64_t *chunk_array;
unsigned size, num_ibs = 0;
uint32_t uf_offset = 0;
+   uint64_t tba_addr = 0, tma_addr = 0;
int i;
int ret;
 
@@ -214,6 +228,19 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser 
*p, union drm_amdgpu_cs
 
break;
 
+   case AMDGPU_CHUNK_ID_TRAP:
+   size = sizeof(struct drm_amdgpu_cs_chunk_trap);
+   if (p->chunks[i].length_dw * sizeof(uint32_t) < size) {
+   ret = -EINVAL;
+   goto free_partial_kdata;
+   }
+
+   ret = amdgpu_cs_user_trap_chunk(p, p->chunks[i].kdata,
+   _addr, _addr);
+   if (ret)
+   goto free_partial_kdata;
+   break;
+
case AMDGPU_CHUNK_ID_DEPENDENCIES:
case AMDGPU_CHUNK_ID_SYNCOBJ_IN:
case AMDGPU_CHUNK_ID_SYNCOBJ_OUT:
@@ -239,6 +266,10 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser 
*p, union drm_amdgpu_cs
 
if (p->uf_entry.tv.bo)
p->job->uf_addr = uf_offset;
+
+   p->job->tba_addr = tba_addr;
+   p->job->tma_addr = tma_addr;
+
kfree(chunk_array);
 
/* Use this opportunity to fill in task info for the vm */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 26127c7d2f32..1e703119e4c2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -88,9 +88,10 @@
  * - 3.37.0 - L2 is invalidated before SDMA IBs, needed for correctness
  * - 3.38.0 - Add AMDGPU_IB_FLAG_EMIT_MEM_SYNC
  * - 3.39.0 - DMABUF implicit sync does a full pipeline sync
+ * - 3.40.0 - Add AMDGPU_CHUNK_ID_TRAP
  */
 #define KMS_DRIVER_MAJOR   3
-#define KMS_DRIVER_MINOR   39
+#define KMS_DRIVER_MINOR   40
 #define KMS_DRIVER_PATCHLEVEL  0
 
 int amdgpu_vram_limit = 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h
index 8e58325bbca2..fd0d56724b4d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h
@@ -58,6 +58,10 @@ struct amdgpu_vmid {
uint32_toa_base;
uint32_toa_size;
 
+   /* user trap */
+   uint64_ttba_addr;
+   uint64_ttma_addr;
+
unsignedpasid;
struct dma_fence*pasid_mapping;
 };
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
index 81caac9b958a..b8ed5b13ea44 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
+++