RE: [PATCH] Revert "drm/amd/scheduler:fix duplicate operation in entity fini"

2017-11-01 Thread Deucher, Alexander
> -Original Message-
> From: amd-gfx [mailto:amd-gfx-boun...@lists.freedesktop.org] On Behalf
> Of Monk Liu
> Sent: Wednesday, November 01, 2017 11:33 PM
> To: amd-gfx@lists.freedesktop.org
> Cc: Liu, Monk
> Subject: [PATCH] Revert "drm/amd/scheduler:fix duplicate operation in
> entity fini"
> 
> fix memory leak.
> This reverts commit d6951b49faa8447a6a77cdb1ef3346b1a1786d31.
> because when entity_fini is interrupted the jobs in queue still
> not processed with job_begin, so the finish_cb is not hooked
> on sched fence, we still need manually do cleanups.
> 
> Change-Id: I6e17bfeeac85062bc52f1d51b9697852b084845c

Missing your signed-off-by.

Alex

> ---
>  drivers/gpu/drm/amd/scheduler/gpu_scheduler.c | 3 ++-
>  1 file changed, 2 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
> b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
> index 6f041e8..7aa6455 100644
> --- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
> +++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
> @@ -231,10 +231,11 @@ void amd_sched_entity_fini(struct
> amd_gpu_scheduler *sched,
>   kthread_unpark(sched->thread);
>   while ((job = to_amd_sched_job(spsc_queue_pop(
> >job_queue {
>   struct amd_sched_fence *s_fence = job->s_fence;
> -
>   amd_sched_fence_scheduled(s_fence);
>   dma_fence_set_error(_fence->finished, -ESRCH);
>   amd_sched_fence_finished(s_fence);
> + dma_fence_put(_fence->finished);
> + sched->ops->free_job(job);
>   }
>   }
>  }
> --
> 2.7.4
> 
> ___
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH] Revert "drm/amd/scheduler:fix duplicate operation in entity fini"

2017-11-01 Thread Monk Liu
fix memory leak.
This reverts commit d6951b49faa8447a6a77cdb1ef3346b1a1786d31.
because when entity_fini is interrupted the jobs in queue still
not processed with job_begin, so the finish_cb is not hooked
on sched fence, we still need manually do cleanups.

Change-Id: I6e17bfeeac85062bc52f1d51b9697852b084845c
---
 drivers/gpu/drm/amd/scheduler/gpu_scheduler.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c 
b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
index 6f041e8..7aa6455 100644
--- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
+++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
@@ -231,10 +231,11 @@ void amd_sched_entity_fini(struct amd_gpu_scheduler 
*sched,
kthread_unpark(sched->thread);
while ((job = 
to_amd_sched_job(spsc_queue_pop(>job_queue {
struct amd_sched_fence *s_fence = job->s_fence;
-
amd_sched_fence_scheduled(s_fence);
dma_fence_set_error(_fence->finished, -ESRCH);
amd_sched_fence_finished(s_fence);
+   dma_fence_put(_fence->finished);
+   sched->ops->free_job(job);
}
}
 }
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 1/3] drm/amdgpu: wrap allocation for amdgpu_device

2017-11-01 Thread Pixel Ding
From: pding 

Add amdgpu_device_alloc() which was part of previous
amdgpu_device_init(). Then it's flexible to handle init
sequence since kfd has dependency to amdgpu_device base
fields.

Signed-off-by: pding 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h|  4 +--
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 54 +++---
 drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c| 18 +++---
 3 files changed, 49 insertions(+), 27 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 2730a75..6f2dc2b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1655,11 +1655,11 @@ static inline struct amdgpu_device 
*amdgpu_ttm_adev(struct ttm_bo_device *bdev)
 {
return container_of(bdev, struct amdgpu_device, mman.bdev);
 }
-
-int amdgpu_device_init(struct amdgpu_device *adev,
+int amdgpu_device_alloc(struct amdgpu_device **padev,
   struct drm_device *ddev,
   struct pci_dev *pdev,
   uint32_t flags);
+int amdgpu_device_init(struct amdgpu_device *adev);
 void amdgpu_device_fini(struct amdgpu_device *adev);
 int amdgpu_gpu_wait_for_idle(struct amdgpu_device *adev);
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 068b56a..809e656 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -2092,25 +2092,33 @@ bool amdgpu_device_has_dc_support(struct amdgpu_device 
*adev)
 }
 
 /**
- * amdgpu_device_init - initialize the driver
+ * amdgpu_device_alloc - allocate/init amdgpu_device structure
  *
- * @adev: amdgpu_device pointer
+ * @padev: pointer to amdgpu_device pointer
  * @pdev: drm dev pointer
  * @pdev: pci dev pointer
  * @flags: driver flags
  *
- * Initializes the driver info and hw (all asics).
  * Returns 0 for success or an error on failure.
- * Called at driver startup.
  */
-int amdgpu_device_init(struct amdgpu_device *adev,
-  struct drm_device *ddev,
-  struct pci_dev *pdev,
-  uint32_t flags)
+int amdgpu_device_alloc(struct amdgpu_device **padev,
+   struct drm_device *ddev,
+   struct pci_dev *pdev,
+   uint32_t flags)
 {
-   int r, i;
-   bool runtime = false;
-   u32 max_MBps;
+   struct amdgpu_device *adev;
+
+   adev = kzalloc(sizeof(struct amdgpu_device), GFP_KERNEL);
+   if (adev == NULL)
+   return -ENOMEM;
+
+   if ((amdgpu_runtime_pm != 0) &&
+   amdgpu_has_atpx() &&
+   (amdgpu_is_atpx_hybrid() ||
+amdgpu_has_atpx_dgpu_power_cntl()) &&
+   ((flags & AMD_IS_APU) == 0) &&
+   !pci_is_thunderbolt_attached(pdev))
+   flags |= AMD_IS_PX;
 
adev->shutdown = false;
adev->dev = >dev;
@@ -2183,6 +2191,28 @@ int amdgpu_device_init(struct amdgpu_device *adev,
 
INIT_DELAYED_WORK(>late_init_work, amdgpu_late_init_func_handler);
 
+   *padev = adev;
+
+   return 0;
+}
+/**
+ * amdgpu_device_init - initialize the driver
+ *
+ * @adev: amdgpu_device pointer
+ * @pdev: drm dev pointer
+ * @pdev: pci dev pointer
+ * @flags: driver flags
+ *
+ * Initializes the driver info and hw (all asics).
+ * Returns 0 for success or an error on failure.
+ * Called at driver startup.
+ */
+int amdgpu_device_init(struct amdgpu_device *adev)
+{
+   int r, i;
+   bool runtime = false;
+   u32 max_MBps;
+
/* Registers mapping */
/* TODO: block userspace mapping of io register */
if (adev->asic_type >= CHIP_BONAIRE) {
@@ -2226,7 +2256,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
 
if (amdgpu_runtime_pm == 1)
runtime = true;
-   if (amdgpu_device_is_px(ddev))
+   if (amdgpu_device_is_px(adev->ddev))
runtime = true;
if (!pci_is_thunderbolt_attached(adev->pdev))
vga_switcheroo_register_client(adev->pdev,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
index 3e9760d..acdb010 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -124,19 +124,11 @@ int amdgpu_driver_load_kms(struct drm_device *dev, 
unsigned long flags)
 #endif
 retry_init:
 
-   adev = kzalloc(sizeof(struct amdgpu_device), GFP_KERNEL);
-   if (adev == NULL) {
-   return -ENOMEM;
-   }
-   dev->dev_private = (void *)adev;
+   r = amdgpu_device_alloc(, dev, dev->pdev, flags);
+   if (r)
+   return r;
 
-   if ((amdgpu_runtime_pm != 0) &&
-   amdgpu_has_atpx() &&
-   (amdgpu_is_atpx_hybrid() ||
-amdgpu_has_atpx_dgpu_power_cntl()) &&
-   ((flags & AMD_IS_APU) == 0) &&
-   

release ex mode after hw_init if no KIQ

2017-11-01 Thread Pixel Ding
Hi Oded,

There're 3 patches for releasing exclusive mode after hw_init if
KIQ is not enabled.

[PATCH 1/3] drm/amdgpu: wrap allocation for amdgpu_device
Allocation of amdgpu_device and base fields are wrapped put it ahead.

[PATCH 2/3] drm/amdgpu: release exclusive mode after hw_init if no

[PATCH 3/3] drm/amdkfd: pass kgd to kfd inside device_init
Still don't pass kgd into probe callback in case of that it's used
unsafely.

Please help review. Thanks very much!
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 6/8] drm/amdgpu: Implement amdgpu SDMA functions for VI

2017-11-01 Thread Felix Kuehling
From: Philip Cox 

Signed-off-by: Philip Cox 
Signed-off-by: shaoyun liu 
Signed-off-by: Yong Zhao 
Signed-off-by: Jay Cornwall 
Signed-off-by: Felix Kuehling 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c | 85 ---
 drivers/gpu/drm/amd/amdgpu/vid.h  |  2 +
 drivers/gpu/drm/amd/include/vi_structs.h  |  2 +
 3 files changed, 78 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
index 03c564d..1d989e4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
@@ -45,7 +45,7 @@ enum hqd_dequeue_request_type {
RESET_WAVES
 };
 
-struct cik_sdma_rlc_registers;
+struct vi_sdma_mqd;
 
 /*
  * Register access functions
@@ -269,9 +269,15 @@ static int kgd_init_interrupts(struct kgd_dev *kgd, 
uint32_t pipe_id)
return 0;
 }
 
-static inline uint32_t get_sdma_base_addr(struct cik_sdma_rlc_registers *m)
+static inline uint32_t get_sdma_base_addr(struct vi_sdma_mqd *m)
 {
-   return 0;
+   uint32_t retval;
+
+   retval = m->sdma_engine_id * SDMA1_REGISTER_OFFSET +
+   m->sdma_queue_id * KFD_VI_SDMA_QUEUE_OFFSET;
+   pr_debug("kfd: sdma base address: 0x%x\n", retval);
+
+   return retval;
 }
 
 static inline struct vi_mqd *get_mqd(void *mqd)
@@ -279,9 +285,9 @@ static inline struct vi_mqd *get_mqd(void *mqd)
return (struct vi_mqd *)mqd;
 }
 
-static inline struct cik_sdma_rlc_registers *get_sdma_mqd(void *mqd)
+static inline struct vi_sdma_mqd *get_sdma_mqd(void *mqd)
 {
-   return (struct cik_sdma_rlc_registers *)mqd;
+   return (struct vi_sdma_mqd *)mqd;
 }
 
 static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
@@ -362,6 +368,63 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, 
uint32_t pipe_id,
 static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd,
 uint32_t __user *wptr, struct mm_struct *mm)
 {
+   struct amdgpu_device *adev = get_amdgpu_device(kgd);
+   struct vi_sdma_mqd *m;
+   unsigned long end_jiffies;
+   uint32_t sdma_base_addr;
+   uint32_t data;
+
+   m = get_sdma_mqd(mqd);
+   sdma_base_addr = get_sdma_base_addr(m);
+   WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL,
+   m->sdmax_rlcx_rb_cntl & (~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK));
+
+   end_jiffies = msecs_to_jiffies(2000) + jiffies;
+   while (true) {
+   data = RREG32(sdma_base_addr + mmSDMA0_RLC0_CONTEXT_STATUS);
+   if (data & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK)
+   break;
+   if (time_after(jiffies, end_jiffies))
+   return -ETIME;
+   usleep_range(500, 1000);
+   }
+   if (m->sdma_engine_id) {
+   data = RREG32(mmSDMA1_GFX_CONTEXT_CNTL);
+   data = REG_SET_FIELD(data, SDMA1_GFX_CONTEXT_CNTL,
+   RESUME_CTX, 0);
+   WREG32(mmSDMA1_GFX_CONTEXT_CNTL, data);
+   } else {
+   data = RREG32(mmSDMA0_GFX_CONTEXT_CNTL);
+   data = REG_SET_FIELD(data, SDMA0_GFX_CONTEXT_CNTL,
+   RESUME_CTX, 0);
+   WREG32(mmSDMA0_GFX_CONTEXT_CNTL, data);
+   }
+
+   data = REG_SET_FIELD(m->sdmax_rlcx_doorbell, SDMA0_RLC0_DOORBELL,
+ENABLE, 1);
+   WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, data);
+   WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR, m->sdmax_rlcx_rb_rptr);
+
+   if (read_user_wptr(mm, wptr, data))
+   WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR, data);
+   else
+   WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR,
+  m->sdmax_rlcx_rb_rptr);
+
+   WREG32(sdma_base_addr + mmSDMA0_RLC0_VIRTUAL_ADDR,
+   m->sdmax_rlcx_virtual_addr);
+   WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE, m->sdmax_rlcx_rb_base);
+   WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE_HI,
+   m->sdmax_rlcx_rb_base_hi);
+   WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_LO,
+   m->sdmax_rlcx_rb_rptr_addr_lo);
+   WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_HI,
+   m->sdmax_rlcx_rb_rptr_addr_hi);
+
+   data = REG_SET_FIELD(m->sdmax_rlcx_rb_cntl, SDMA0_RLC0_RB_CNTL,
+RB_ENABLE, 1);
+   WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, data);
+
return 0;
 }
 
@@ -390,7 +453,7 @@ static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, 
uint64_t queue_address,
 static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd)
 {
struct amdgpu_device *adev = get_amdgpu_device(kgd);
-   struct 

[PATCH 8/8] drm/amdkfd: Implement amdkfd SDMA functions for VI

2017-11-01 Thread Felix Kuehling
From: Philip Cox 

Signed-off-by: Philip Cox 
Signed-off-by: shaoyun liu 
Signed-off-by: Jay Cornwall 
Signed-off-by: Felix Kuehling 
---
 drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c | 103 +++-
 1 file changed, 102 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c
index dc92497..a117d2b 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c
@@ -30,7 +30,7 @@
 #include "vi_structs.h"
 #include "gca/gfx_8_0_sh_mask.h"
 #include "gca/gfx_8_0_enum.h"
-
+#include "oss/oss_3_0_sh_mask.h"
 #define CP_MQD_CONTROL__PRIV_STATE__SHIFT 0x8
 
 static inline struct vi_mqd *get_mqd(void *mqd)
@@ -239,6 +239,101 @@ static int update_mqd_hiq(struct mqd_manager *mm, void 
*mqd,
return retval;
 }
 
+static int init_mqd_sdma(struct mqd_manager *mm, void **mqd,
+   struct kfd_mem_obj **mqd_mem_obj, uint64_t *gart_addr,
+   struct queue_properties *q)
+{
+   int retval;
+   struct vi_sdma_mqd *m;
+
+
+   retval = kfd_gtt_sa_allocate(mm->dev,
+   sizeof(struct vi_sdma_mqd),
+   mqd_mem_obj);
+
+   if (retval != 0)
+   return -ENOMEM;
+
+   m = (struct vi_sdma_mqd *) (*mqd_mem_obj)->cpu_ptr;
+
+   memset(m, 0, sizeof(struct vi_sdma_mqd));
+
+   *mqd = m;
+   if (gart_addr != NULL)
+   *gart_addr = (*mqd_mem_obj)->gpu_addr;
+
+   retval = mm->update_mqd(mm, m, q);
+
+   return retval;
+}
+
+static void uninit_mqd_sdma(struct mqd_manager *mm, void *mqd,
+   struct kfd_mem_obj *mqd_mem_obj)
+{
+   kfd_gtt_sa_free(mm->dev, mqd_mem_obj);
+}
+
+static int load_mqd_sdma(struct mqd_manager *mm, void *mqd,
+   uint32_t pipe_id, uint32_t queue_id,
+   struct queue_properties *p, struct mm_struct *mms)
+{
+   return mm->dev->kfd2kgd->hqd_sdma_load(mm->dev->kgd, mqd,
+  (uint32_t __user *)p->write_ptr,
+  mms);
+}
+
+static int update_mqd_sdma(struct mqd_manager *mm, void *mqd,
+   struct queue_properties *q)
+{
+   struct vi_sdma_mqd *m;
+
+   m = get_sdma_mqd(mqd);
+   m->sdmax_rlcx_rb_cntl = (ffs(q->queue_size / sizeof(unsigned int)) - 1)
+   << SDMA0_RLC0_RB_CNTL__RB_SIZE__SHIFT |
+   q->vmid << SDMA0_RLC0_RB_CNTL__RB_VMID__SHIFT |
+   1 << SDMA0_RLC0_RB_CNTL__RPTR_WRITEBACK_ENABLE__SHIFT |
+   6 << SDMA0_RLC0_RB_CNTL__RPTR_WRITEBACK_TIMER__SHIFT;
+
+   m->sdmax_rlcx_rb_base = lower_32_bits(q->queue_address >> 8);
+   m->sdmax_rlcx_rb_base_hi = upper_32_bits(q->queue_address >> 8);
+   m->sdmax_rlcx_rb_rptr_addr_lo = lower_32_bits((uint64_t)q->read_ptr);
+   m->sdmax_rlcx_rb_rptr_addr_hi = upper_32_bits((uint64_t)q->read_ptr);
+   m->sdmax_rlcx_doorbell =
+   q->doorbell_off << SDMA0_RLC0_DOORBELL__OFFSET__SHIFT;
+
+   m->sdmax_rlcx_virtual_addr = q->sdma_vm_addr;
+
+   m->sdma_engine_id = q->sdma_engine_id;
+   m->sdma_queue_id = q->sdma_queue_id;
+
+   q->is_active = (q->queue_size > 0 &&
+   q->queue_address != 0 &&
+   q->queue_percent > 0);
+
+   return 0;
+}
+
+/*
+ *  * preempt type here is ignored because there is only one way
+ *  * to preempt sdma queue
+ */
+static int destroy_mqd_sdma(struct mqd_manager *mm, void *mqd,
+   enum kfd_preempt_type type,
+   unsigned int timeout, uint32_t pipe_id,
+   uint32_t queue_id)
+{
+   return mm->dev->kfd2kgd->hqd_sdma_destroy(mm->dev->kgd, mqd, timeout);
+}
+
+static bool is_occupied_sdma(struct mqd_manager *mm, void *mqd,
+   uint64_t queue_address, uint32_t pipe_id,
+   uint32_t queue_id)
+{
+   return mm->dev->kfd2kgd->hqd_sdma_is_occupied(mm->dev->kgd, mqd);
+}
+
+
+
 struct mqd_manager *mqd_manager_init_vi(enum KFD_MQD_TYPE type,
struct kfd_dev *dev)
 {
@@ -272,6 +367,12 @@ struct mqd_manager *mqd_manager_init_vi(enum KFD_MQD_TYPE 
type,
mqd->is_occupied = is_occupied;
break;
case KFD_MQD_TYPE_SDMA:
+   mqd->init_mqd = init_mqd_sdma;
+   mqd->uninit_mqd = uninit_mqd_sdma;
+   mqd->load_mqd = load_mqd_sdma;
+   mqd->update_mqd = update_mqd_sdma;
+   mqd->destroy_mqd = destroy_mqd_sdma;
+   mqd->is_occupied = is_occupied_sdma;
break;
default:
kfree(mqd);
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 5/8] drm/amdgpu: Add support for resuming SDMA queues w/o HWS

2017-11-01 Thread Felix Kuehling
Save wptr in hqd_sdma_destroy, restore it in hqd_sdma_load. Also
read updated wptr from user mode when resuming an SDMA queue.

Signed-off-by: Jay Cornwall 
Signed-off-by: Yong Zhao 
Signed-off-by: Felix Kuehling 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c | 30 +++
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c |  9 ---
 2 files changed, 25 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
index a55d794..14333af 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
@@ -412,10 +412,17 @@ static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void 
*mqd,
WREG32(mmSDMA0_GFX_CONTEXT_CNTL, data);
}
 
-   WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL,
-   m->sdma_rlc_doorbell);
-   WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR, 0);
-   WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR, 0);
+   data = REG_SET_FIELD(m->sdma_rlc_doorbell, SDMA0_RLC0_DOORBELL,
+ENABLE, 1);
+   WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, data);
+   WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR, m->sdma_rlc_rb_rptr);
+
+   if (read_user_wptr(mm, wptr, data))
+   WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR, data);
+   else
+   WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR,
+  m->sdma_rlc_rb_rptr);
+
WREG32(sdma_base_addr + mmSDMA0_RLC0_VIRTUAL_ADDR,
m->sdma_rlc_virtual_addr);
WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE, m->sdma_rlc_rb_base);
@@ -425,8 +432,10 @@ static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void 
*mqd,
m->sdma_rlc_rb_rptr_addr_lo);
WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_HI,
m->sdma_rlc_rb_rptr_addr_hi);
-   WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL,
-   m->sdma_rlc_rb_cntl);
+
+   data = REG_SET_FIELD(m->sdma_rlc_rb_cntl, SDMA0_RLC0_RB_CNTL,
+RB_ENABLE, 1);
+   WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, data);
 
return 0;
 }
@@ -577,7 +586,7 @@ static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void 
*mqd,
struct cik_sdma_rlc_registers *m;
uint32_t sdma_base_addr;
uint32_t temp;
-   int timeout = utimeout;
+   unsigned long end_jiffies = (utimeout * HZ / 1000) + jiffies;
 
m = get_sdma_mqd(mqd);
sdma_base_addr = get_sdma_base_addr(m);
@@ -590,10 +599,9 @@ static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void 
*mqd,
temp = RREG32(sdma_base_addr + mmSDMA0_RLC0_CONTEXT_STATUS);
if (temp & SDMA0_STATUS_REG__RB_CMD_IDLE__SHIFT)
break;
-   if (timeout <= 0)
+   if (time_after(jiffies, end_jiffies))
return -ETIME;
-   msleep(20);
-   timeout -= 20;
+   usleep_range(500, 1000);
}
 
WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, 0);
@@ -601,6 +609,8 @@ static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void 
*mqd,
RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL) |
SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK);
 
+   m->sdma_rlc_rb_rptr = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR);
+
return 0;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
index 1017ff5..03c564d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
@@ -514,7 +514,7 @@ static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void 
*mqd,
struct cik_sdma_rlc_registers *m;
uint32_t sdma_base_addr;
uint32_t temp;
-   int timeout = utimeout;
+   unsigned long end_jiffies = (utimeout * HZ / 1000) + jiffies;
 
m = get_sdma_mqd(mqd);
sdma_base_addr = get_sdma_base_addr(m);
@@ -527,10 +527,9 @@ static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void 
*mqd,
temp = RREG32(sdma_base_addr + mmSDMA0_RLC0_CONTEXT_STATUS);
if (temp & SDMA0_STATUS_REG__RB_CMD_IDLE__SHIFT)
break;
-   if (timeout <= 0)
+   if (time_after(jiffies, end_jiffies))
return -ETIME;
-   msleep(20);
-   timeout -= 20;
+   usleep_range(500, 1000);
}
 
WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, 0);
@@ -538,6 +537,8 @@ static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void 
*mqd,
WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR, 0);
WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE, 0);
 
+   m->sdmax_rlcx_rb_rptr = 

[PATCH 1/8] drm/amdgpu: Correct SDMA load/unload sequence on HWS disabled mode

2017-11-01 Thread Felix Kuehling
Fix the SDMA load and unload sequence as suggested by HW document.

Signed-off-by: shaoyun liu 
Signed-off-by: Felix Kuehling 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c | 47 ---
 1 file changed, 34 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
index 47d1c13..1e3e9be 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
@@ -379,29 +379,50 @@ static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void 
*mqd)
 {
struct amdgpu_device *adev = get_amdgpu_device(kgd);
struct cik_sdma_rlc_registers *m;
+   unsigned long end_jiffies;
uint32_t sdma_base_addr;
+   uint32_t data;
 
m = get_sdma_mqd(mqd);
sdma_base_addr = get_sdma_base_addr(m);
 
-   WREG32(sdma_base_addr + mmSDMA0_RLC0_VIRTUAL_ADDR,
-   m->sdma_rlc_virtual_addr);
+   WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL,
+   m->sdma_rlc_rb_cntl & (~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK));
 
-   WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE,
-   m->sdma_rlc_rb_base);
+   end_jiffies = msecs_to_jiffies(2000) + jiffies;
+   while (true) {
+   data = RREG32(sdma_base_addr + mmSDMA0_RLC0_CONTEXT_STATUS);
+   if (data & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK)
+   break;
+   if (time_after(jiffies, end_jiffies))
+   return -ETIME;
+   usleep_range(500, 1000);
+   }
+   if (m->sdma_engine_id) {
+   data = RREG32(mmSDMA1_GFX_CONTEXT_CNTL);
+   data = REG_SET_FIELD(data, SDMA1_GFX_CONTEXT_CNTL,
+   RESUME_CTX, 0);
+   WREG32(mmSDMA1_GFX_CONTEXT_CNTL, data);
+   } else {
+   data = RREG32(mmSDMA0_GFX_CONTEXT_CNTL);
+   data = REG_SET_FIELD(data, SDMA0_GFX_CONTEXT_CNTL,
+   RESUME_CTX, 0);
+   WREG32(mmSDMA0_GFX_CONTEXT_CNTL, data);
+   }
 
+   WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL,
+   m->sdma_rlc_doorbell);
+   WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR, 0);
+   WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR, 0);
+   WREG32(sdma_base_addr + mmSDMA0_RLC0_VIRTUAL_ADDR,
+   m->sdma_rlc_virtual_addr);
+   WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE, m->sdma_rlc_rb_base);
WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE_HI,
m->sdma_rlc_rb_base_hi);
-
WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_LO,
m->sdma_rlc_rb_rptr_addr_lo);
-
WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_HI,
m->sdma_rlc_rb_rptr_addr_hi);
-
-   WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL,
-   m->sdma_rlc_doorbell);
-
WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL,
m->sdma_rlc_rb_cntl);
 
@@ -574,9 +595,9 @@ static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void 
*mqd,
}
 
WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, 0);
-   WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR, 0);
-   WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR, 0);
-   WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE, 0);
+   WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL,
+   RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL) |
+   SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK);
 
return 0;
 }
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 2/8] drm/amdkfd: Correct SDMA ring buffer size

2017-11-01 Thread Felix Kuehling
From: shaoyunl 

ffs function return the position of the first bit set on 1 based.
(bit zero returns 1).

Signed-off-by: shaoyun liu 
Signed-off-by: Felix Kuehling 
---
 drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c
index 4859d26..4728fad 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c
@@ -202,8 +202,8 @@ static int update_mqd_sdma(struct mqd_manager *mm, void 
*mqd,
struct cik_sdma_rlc_registers *m;
 
m = get_sdma_mqd(mqd);
-   m->sdma_rlc_rb_cntl = ffs(q->queue_size / sizeof(unsigned int)) <<
-   SDMA0_RLC0_RB_CNTL__RB_SIZE__SHIFT |
+   m->sdma_rlc_rb_cntl = (ffs(q->queue_size / sizeof(unsigned int)) - 1)
+   << SDMA0_RLC0_RB_CNTL__RB_SIZE__SHIFT |
q->vmid << SDMA0_RLC0_RB_CNTL__RB_VMID__SHIFT |
1 << SDMA0_RLC0_RB_CNTL__RPTR_WRITEBACK_ENABLE__SHIFT |
6 << SDMA0_RLC0_RB_CNTL__RPTR_WRITEBACK_TIMER__SHIFT;
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 7/8] drm/amdkfd: Use ASIC-specific SDMA MQD type

2017-11-01 Thread Felix Kuehling
Signed-off-by: shaoyun liu 
Signed-off-by: Felix Kuehling 
---
 drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c | 13 +
 drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c  |  5 +
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h|  2 --
 3 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c
index ea02bfa..9873929 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c
@@ -36,6 +36,11 @@ static inline struct cik_mqd *get_mqd(void *mqd)
return (struct cik_mqd *)mqd;
 }
 
+static inline struct cik_sdma_rlc_registers *get_sdma_mqd(void *mqd)
+{
+   return (struct cik_sdma_rlc_registers *)mqd;
+}
+
 static int init_mqd(struct mqd_manager *mm, void **mqd,
struct kfd_mem_obj **mqd_mem_obj, uint64_t *gart_addr,
struct queue_properties *q)
@@ -362,14 +367,6 @@ static int update_mqd_hiq(struct mqd_manager *mm, void 
*mqd,
return 0;
 }
 
-struct cik_sdma_rlc_registers *get_sdma_mqd(void *mqd)
-{
-   struct cik_sdma_rlc_registers *m;
-
-   m = (struct cik_sdma_rlc_registers *)mqd;
-
-   return m;
-}
 
 struct mqd_manager *mqd_manager_init_cik(enum KFD_MQD_TYPE type,
struct kfd_dev *dev)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c
index 4ea854f..dc92497 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c
@@ -38,6 +38,11 @@ static inline struct vi_mqd *get_mqd(void *mqd)
return (struct vi_mqd *)mqd;
 }
 
+static inline struct vi_sdma_mqd *get_sdma_mqd(void *mqd)
+{
+   return (struct vi_sdma_mqd *)mqd;
+}
+
 static int init_mqd(struct mqd_manager *mm, void **mqd,
struct kfd_mem_obj **mqd_mem_obj, uint64_t *gart_addr,
struct queue_properties *q)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h 
b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index 9e4134c..4750473 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -643,8 +643,6 @@ int kgd2kfd_resume(struct kfd_dev *kfd);
 int kfd_init_apertures(struct kfd_process *process);
 
 /* Queue Context Management */
-struct cik_sdma_rlc_registers *get_sdma_mqd(void *mqd);
-
 int init_queue(struct queue **q, const struct queue_properties *properties);
 void uninit_queue(struct queue *q);
 void print_queue_properties(struct queue_properties *q);
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 3/8] drm/amdkfd: Fix SDMA oversubsription handling

2017-11-01 Thread Felix Kuehling
SDMA only supports a fixed number of queues. HWS cannot handle
oversubscription.

Signed-off-by: shaoyun liu 
Signed-off-by: Felix Kuehling 
---
 drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c | 18 ++
 1 file changed, 18 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
index 2bec902..a3f1e62 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
@@ -191,6 +191,24 @@ int pqm_create_queue(struct process_queue_manager *pqm,
 
switch (type) {
case KFD_QUEUE_TYPE_SDMA:
+   if (dev->dqm->queue_count >=
+   CIK_SDMA_QUEUES_PER_ENGINE * CIK_SDMA_ENGINE_NUM) {
+   pr_err("Over-subscription is not allowed for SDMA.\n");
+   retval = -EPERM;
+   goto err_create_queue;
+   }
+
+   retval = create_cp_queue(pqm, dev, , properties, f, *qid);
+   if (retval != 0)
+   goto err_create_queue;
+   pqn->q = q;
+   pqn->kq = NULL;
+   retval = dev->dqm->ops.create_queue(dev->dqm, q, >qpd,
+   >properties.vmid);
+   pr_debug("DQM returned %d for create_queue\n", retval);
+   print_queue(q);
+   break;
+
case KFD_QUEUE_TYPE_COMPUTE:
/* check if there is over subscription */
if ((sched_policy == KFD_SCHED_POLICY_HWS_NO_OVERSUBSCRIPTION) 
&&
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 8/8] drm/amdkfd: Minor cleanups v2

2017-11-01 Thread Felix Kuehling
These were missed previously when rebasing changes for upstreaming.

v2: Remove redundant sched_policy conditions

Signed-off-by: Felix Kuehling 
Reviewed-by: Oded Gabbay 
---
 drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 10 --
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h |  2 ++
 drivers/gpu/drm/amd/amdkfd/kfd_process.c  |  2 +-
 3 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index e2fc4c5..e202921 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -389,12 +389,11 @@ static int update_queue(struct device_queue_manager *dqm, 
struct queue *q)
if (sched_policy != KFD_SCHED_POLICY_NO_HWS) {
retval = unmap_queues_cpsch(dqm,
KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
-   if (retval != 0) {
+   if (retval) {
pr_err("unmap queue failed\n");
goto out_unlock;
}
-   } else if (sched_policy == KFD_SCHED_POLICY_NO_HWS &&
-  prev_active &&
+   } else if (prev_active &&
   (q->properties.type == KFD_QUEUE_TYPE_COMPUTE ||
q->properties.type == KFD_QUEUE_TYPE_SDMA)) {
retval = mqd->destroy_mqd(mqd, q->mqd,
@@ -421,8 +420,7 @@ static int update_queue(struct device_queue_manager *dqm, 
struct queue *q)
 
if (sched_policy != KFD_SCHED_POLICY_NO_HWS)
retval = map_queues_cpsch(dqm);
-   else if (sched_policy == KFD_SCHED_POLICY_NO_HWS &&
-q->properties.is_active &&
+   else if (q->properties.is_active &&
 (q->properties.type == KFD_QUEUE_TYPE_COMPUTE ||
  q->properties.type == KFD_QUEUE_TYPE_SDMA))
retval = mqd->load_mqd(mqd, q->mqd, q->pipe, q->queue,
@@ -832,7 +830,7 @@ static int create_queue_cpsch(struct device_queue_manager 
*dqm, struct queue *q,
 
if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
retval = allocate_sdma_queue(dqm, >sdma_id);
-   if (retval != 0)
+   if (retval)
goto out;
q->properties.sdma_queue_id =
q->sdma_id / CIK_SDMA_QUEUES_PER_ENGINE;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h 
b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index 78b5d61..9e4134c 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -507,6 +507,8 @@ struct kfd_process {
 * In any process, the thread that started main() is the lead
 * thread and outlives the rest.
 * It is here because amd_iommu_bind_pasid wants a task_struct.
+* It can also be used for safely getting a reference to the
+* mm_struct of the process.
 */
struct task_struct *lead_thread;
 
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
index db08f8f..1f5ccd28 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -416,7 +416,7 @@ int kfd_bind_processes_to_device(struct kfd_dev *dev)
err = amd_iommu_bind_pasid(dev->pdev, p->pasid,
p->lead_thread);
if (err < 0) {
-   pr_err("unexpected pasid %d binding failure\n",
+   pr_err("Unexpected pasid %d binding failure\n",
p->pasid);
mutex_unlock(>mutex);
break;
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 0/8] KFD SDMA support for GFX7 and GFX8 v2

2017-11-01 Thread Felix Kuehling
This patch series fixes SDMA user mode queue support for GFX7 and adds
support for GFX8.

v2: Rebased. radeon_kfd.c doesn't exist any more.

Felix Kuehling (5):
  drm/amdgpu: Correct SDMA load/unload sequence on HWS disabled mode
  drm/amdkfd: Fix SDMA oversubsription handling
  drm/amd: Update kgd_kfd interface for resuming SDMA queues
  drm/amdgpu: Add support for resuming SDMA queues w/o HWS
  drm/amdkfd: Use ASIC-specific SDMA MQD type

Philip Cox (2):
  drm/amdgpu: Implement amdgpu SDMA functions for VI
  drm/amdkfd: Implement amdkfd SDMA functions for VI

shaoyunl (1):
  drm/amdkfd: Correct SDMA ring buffer size

 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c  |  73 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c  | 100 +++
 drivers/gpu/drm/amd/amdgpu/vid.h   |   2 +
 drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c   |  21 ++--
 drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c| 108 -
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h  |   2 -
 .../gpu/drm/amd/amdkfd/kfd_process_queue_manager.c |  18 
 drivers/gpu/drm/amd/include/kgd_kfd_interface.h|   3 +-
 drivers/gpu/drm/amd/include/vi_structs.h   |   2 +
 9 files changed, 277 insertions(+), 52 deletions(-)

-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 4/8] drm/amd: Update kgd_kfd interface for resuming SDMA queues

2017-11-01 Thread Felix Kuehling
Add wptr and mm parameters to hqd_sdma_load and pass these parameters
from device_queue_manager through the mqd_manager.

SDMA doesn't support polling while the engine believes it's idle. The
driver must update the wptr. The new parameters will be used for looking
up the updated value from the specified mm when SDMA queues are resumed
after being disabled.

Signed-off-by: Felix Kuehling 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c | 6 --
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c | 6 --
 drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c  | 4 +++-
 drivers/gpu/drm/amd/include/kgd_kfd_interface.h   | 3 ++-
 4 files changed, 13 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
index 1e3e9be..a55d794 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
@@ -105,7 +105,8 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, 
uint32_t pipe_id,
uint32_t queue_id, uint32_t __user *wptr,
uint32_t wptr_shift, uint32_t wptr_mask,
struct mm_struct *mm);
-static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd);
+static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd,
+uint32_t __user *wptr, struct mm_struct *mm);
 static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address,
uint32_t pipe_id, uint32_t queue_id);
 
@@ -375,7 +376,8 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, 
uint32_t pipe_id,
return 0;
 }
 
-static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd)
+static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd,
+uint32_t __user *wptr, struct mm_struct *mm)
 {
struct amdgpu_device *adev = get_amdgpu_device(kgd);
struct cik_sdma_rlc_registers *m;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
index 056929b..1017ff5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
@@ -64,7 +64,8 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, 
uint32_t pipe_id,
uint32_t queue_id, uint32_t __user *wptr,
uint32_t wptr_shift, uint32_t wptr_mask,
struct mm_struct *mm);
-static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd);
+static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd,
+uint32_t __user *wptr, struct mm_struct *mm);
 static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address,
uint32_t pipe_id, uint32_t queue_id);
 static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd);
@@ -358,7 +359,8 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, 
uint32_t pipe_id,
return 0;
 }
 
-static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd)
+static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd,
+uint32_t __user *wptr, struct mm_struct *mm)
 {
return 0;
 }
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c
index 4728fad..ea02bfa 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c
@@ -160,7 +160,9 @@ static int load_mqd_sdma(struct mqd_manager *mm, void *mqd,
 uint32_t pipe_id, uint32_t queue_id,
 struct queue_properties *p, struct mm_struct *mms)
 {
-   return mm->dev->kfd2kgd->hqd_sdma_load(mm->dev->kgd, mqd);
+   return mm->dev->kfd2kgd->hqd_sdma_load(mm->dev->kgd, mqd,
+  (uint32_t __user *)p->write_ptr,
+  mms);
 }
 
 static int update_mqd(struct mqd_manager *mm, void *mqd,
diff --git a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h 
b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
index f516fd1..c6d4e64 100644
--- a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
+++ b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
@@ -184,7 +184,8 @@ struct kfd2kgd_calls {
uint32_t wptr_shift, uint32_t wptr_mask,
struct mm_struct *mm);
 
-   int (*hqd_sdma_load)(struct kgd_dev *kgd, void *mqd);
+   int (*hqd_sdma_load)(struct kgd_dev *kgd, void *mqd,
+uint32_t __user *wptr, struct mm_struct *mm);
 
bool (*hqd_is_occupied)(struct kgd_dev *kgd, uint64_t queue_address,
uint32_t pipe_id, uint32_t queue_id);
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org

[PATCH 4/8] drm/amdkfd: Fix debug unregister procedure on process termination v2

2017-11-01 Thread Felix Kuehling
From: Yair Shachar 

Take the dbgmgr lock and unregister before destroying the debug manager.
Do this before destroying the queues.

v2: Correct locking order in kfd_ioctl_dbg_register to ake sure the
process mutex and dbgmgr mutex are always taken in the same order.

Signed-off-by: Yair Shachar 
Signed-off-by: Felix Kuehling 
Reviewed-by: Oded Gabbay 
---
 drivers/gpu/drm/amd/amdkfd/kfd_chardev.c |  4 ++--
 drivers/gpu/drm/amd/amdkfd/kfd_process.c | 37 +++-
 2 files changed, 29 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index a25321f..505d391 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -450,8 +450,8 @@ static int kfd_ioctl_dbg_register(struct file *filep,
return -EINVAL;
}
 
-   mutex_lock(kfd_get_dbgmgr_mutex());
mutex_lock(>mutex);
+   mutex_lock(kfd_get_dbgmgr_mutex());
 
/*
 * make sure that we have pdd, if this the first queue created for
@@ -479,8 +479,8 @@ static int kfd_ioctl_dbg_register(struct file *filep,
}
 
 out:
-   mutex_unlock(>mutex);
mutex_unlock(kfd_get_dbgmgr_mutex());
+   mutex_unlock(>mutex);
 
return status;
 }
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
index b81ad816..db08f8f 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -224,17 +224,26 @@ static void kfd_process_notifier_release(struct 
mmu_notifier *mn,
 
mutex_lock(>mutex);
 
+   /* Iterate over all process device data structures and if the
+* pdd is in debug mode, we should first force unregistration,
+* then we will be able to destroy the queues
+*/
+   list_for_each_entry(pdd, >per_device_data, per_device_list) {
+   struct kfd_dev *dev = pdd->dev;
+
+   mutex_lock(kfd_get_dbgmgr_mutex());
+   if (dev && dev->dbgmgr && dev->dbgmgr->pasid == p->pasid) {
+   if (!kfd_dbgmgr_unregister(dev->dbgmgr, p)) {
+   kfd_dbgmgr_destroy(dev->dbgmgr);
+   dev->dbgmgr = NULL;
+   }
+   }
+   mutex_unlock(kfd_get_dbgmgr_mutex());
+   }
+
kfd_process_dequeue_from_all_devices(p);
pqm_uninit(>pqm);
 
-   /* Iterate over all process device data structure and check
-* if we should delete debug managers
-*/
-   list_for_each_entry(pdd, >per_device_data, per_device_list)
-   if ((pdd->dev->dbgmgr) &&
-   (pdd->dev->dbgmgr->pasid == p->pasid))
-   kfd_dbgmgr_destroy(pdd->dev->dbgmgr);
-
mutex_unlock(>mutex);
 
/*
@@ -463,8 +472,16 @@ void kfd_process_iommu_unbind_callback(struct kfd_dev 
*dev, unsigned int pasid)
 
pr_debug("Unbinding process %d from IOMMU\n", pasid);
 
-   if ((dev->dbgmgr) && (dev->dbgmgr->pasid == p->pasid))
-   kfd_dbgmgr_destroy(dev->dbgmgr);
+   mutex_lock(kfd_get_dbgmgr_mutex());
+
+   if (dev->dbgmgr && dev->dbgmgr->pasid == p->pasid) {
+   if (!kfd_dbgmgr_unregister(dev->dbgmgr, p)) {
+   kfd_dbgmgr_destroy(dev->dbgmgr);
+   dev->dbgmgr = NULL;
+   }
+   }
+
+   mutex_unlock(kfd_get_dbgmgr_mutex());
 
pdd = kfd_get_process_device_data(dev, p);
if (pdd)
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 6/8] drm/amdkfd: Cleanup DQM ASIC-specific ops

2017-11-01 Thread Felix Kuehling
From: Yong Zhao 

Remove empty initialize function.

Rename register_process to update_qpd to avoid confusion with the
non-ASIC-specific register_process.

Shorten ops_asic_specific to asic_ops.

Signed-off-by: Yong Zhao 
Signed-off-by: Felix Kuehling 
Reviewed-by: Oded Gabbay 
---
 .../gpu/drm/amd/amdkfd/kfd_device_queue_manager.c| 19 +++
 .../gpu/drm/amd/amdkfd/kfd_device_queue_manager.h| 11 ++-
 .../drm/amd/amdkfd/kfd_device_queue_manager_cik.c| 20 +++-
 .../gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c | 20 +++-
 4 files changed, 27 insertions(+), 43 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index da3b743..45b98dd 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -467,7 +467,7 @@ static int register_process(struct device_queue_manager 
*dqm,
mutex_lock(>lock);
list_add(>list, >queues);
 
-   retval = dqm->ops_asic_specific.register_process(dqm, qpd);
+   retval = dqm->asic_ops.update_qpd(dqm, qpd);
 
dqm->processes_count++;
 
@@ -629,7 +629,7 @@ static int create_sdma_queue_nocpsch(struct 
device_queue_manager *dqm,
pr_debug("SDMA queue id: %d\n", q->properties.sdma_queue_id);
pr_debug("SDMA engine id: %d\n", q->properties.sdma_engine_id);
 
-   dqm->ops_asic_specific.init_sdma_vm(dqm, q, qpd);
+   dqm->asic_ops.init_sdma_vm(dqm, q, qpd);
retval = mqd->init_mqd(mqd, >mqd, >mqd_mem_obj,
>gart_mqd_addr, >properties);
if (retval)
@@ -696,8 +696,6 @@ static int set_sched_resources(struct device_queue_manager 
*dqm)
 
 static int initialize_cpsch(struct device_queue_manager *dqm)
 {
-   int retval;
-
pr_debug("num of pipes: %d\n", get_pipes_per_mec(dqm));
 
mutex_init(>lock);
@@ -706,11 +704,8 @@ static int initialize_cpsch(struct device_queue_manager 
*dqm)
dqm->sdma_queue_count = 0;
dqm->active_runlist = false;
dqm->sdma_bitmap = (1 << CIK_SDMA_QUEUES) - 1;
-   retval = dqm->ops_asic_specific.initialize(dqm);
-   if (retval)
-   mutex_destroy(>lock);
 
-   return retval;
+   return 0;
 }
 
 static int start_cpsch(struct device_queue_manager *dqm)
@@ -850,7 +845,7 @@ static int create_queue_cpsch(struct device_queue_manager 
*dqm, struct queue *q,
goto out;
}
 
-   dqm->ops_asic_specific.init_sdma_vm(dqm, q, qpd);
+   dqm->asic_ops.init_sdma_vm(dqm, q, qpd);
retval = mqd->init_mqd(mqd, >mqd, >mqd_mem_obj,
>gart_mqd_addr, >properties);
if (retval)
@@ -1095,7 +1090,7 @@ static bool set_cache_memory_policy(struct 
device_queue_manager *dqm,
qpd->sh_mem_ape1_limit = limit >> 16;
}
 
-   retval = dqm->ops_asic_specific.set_cache_memory_policy(
+   retval = dqm->asic_ops.set_cache_memory_policy(
dqm,
qpd,
default_policy,
@@ -1270,11 +1265,11 @@ struct device_queue_manager 
*device_queue_manager_init(struct kfd_dev *dev)
 
switch (dev->device_info->asic_family) {
case CHIP_CARRIZO:
-   device_queue_manager_init_vi(>ops_asic_specific);
+   device_queue_manager_init_vi(>asic_ops);
break;
 
case CHIP_KAVERI:
-   device_queue_manager_init_cik(>ops_asic_specific);
+   device_queue_manager_init_cik(>asic_ops);
break;
default:
WARN(1, "Unexpected ASIC family %u",
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h 
b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
index 31c2b1f..5b77cb6 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
@@ -128,9 +128,8 @@ struct device_queue_manager_ops {
 };
 
 struct device_queue_manager_asic_ops {
-   int (*register_process)(struct device_queue_manager *dqm,
+   int (*update_qpd)(struct device_queue_manager *dqm,
struct qcm_process_device *qpd);
-   int (*initialize)(struct device_queue_manager *dqm);
bool(*set_cache_memory_policy)(struct device_queue_manager *dqm,
   struct qcm_process_device *qpd,
   enum cache_policy default_policy,
@@ -156,7 +155,7 @@ struct device_queue_manager_asic_ops {
 
 struct device_queue_manager {
struct device_queue_manager_ops ops;
-   struct device_queue_manager_asic_ops ops_asic_specific;
+   struct device_queue_manager_asic_ops asic_ops;
 
struct mqd_manager  

[PATCH 5/8] drm/amdkfd: Register/Deregister process on qpd resolution

2017-11-01 Thread Felix Kuehling
From: Ben Goz 

Process registration needs to happen on each device. So use per-device
queue lists to determine when to register/deregister the process.

Signed-off-by: Ben Goz 
Signed-off-by: Felix Kuehling 
Reviewed-by: Oded Gabbay 
---
 drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c | 9 ++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
index 5129dc1..2bec902 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
@@ -177,7 +177,8 @@ int pqm_create_queue(struct process_queue_manager *pqm,
if (retval != 0)
return retval;
 
-   if (list_empty(>queues)) {
+   if (list_empty(>qpd.queues_list) &&
+   list_empty(>qpd.priv_queue_list)) {
pdd->qpd.pqm = pqm;
dev->dqm->ops.register_process(dev->dqm, >qpd);
}
@@ -248,7 +249,8 @@ int pqm_create_queue(struct process_queue_manager *pqm,
 err_allocate_pqn:
/* check if queues list is empty unregister process from device */
clear_bit(*qid, pqm->queue_slot_bitmap);
-   if (list_empty(>queues))
+   if (list_empty(>qpd.queues_list) &&
+   list_empty(>qpd.priv_queue_list))
dev->dqm->ops.unregister_process(dev->dqm, >qpd);
return retval;
 }
@@ -302,7 +304,8 @@ int pqm_destroy_queue(struct process_queue_manager *pqm, 
unsigned int qid)
kfree(pqn);
clear_bit(qid, pqm->queue_slot_bitmap);
 
-   if (list_empty(>queues))
+   if (list_empty(>qpd.queues_list) &&
+   list_empty(>qpd.priv_queue_list))
dqm->ops.unregister_process(dqm, >qpd);
 
return retval;
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 3/8] drm/amdkfd: Avoid calling amd_iommu_unbind_pasid() when suspending

2017-11-01 Thread Felix Kuehling
From: Yong Zhao 

When kfd suspending on APU, we do not need to call
amd_iommu_unbind_pasid(), because pasid will be unbound automatically
when power goes off.

On the other hand, calling amd_iommu_unbind_pasid() will trigger
kfd_process_iommu_unbind_callback() if the process is not terminating.
By design, kfd_process_iommu_unbind_callback() should only be called
for process terminating. So we would rather not to call
amd_iommu_unbind_pasid() when suspending.

Signed-off-by: Yong Zhao 
Signed-off-by: Felix Kuehling 
Reviewed-by: Oded Gabbay 
---
 drivers/gpu/drm/amd/amdkfd/kfd_process.c | 14 +-
 1 file changed, 5 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
index 946f4d6..b81ad816 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -423,29 +423,25 @@ int kfd_bind_processes_to_device(struct kfd_dev *dev)
 }
 
 /*
- * Temporarily unbind currently bound processes from the device and
- * mark them as PDD_BOUND_SUSPENDED. These processes will be restored
- * to PDD_BOUND state in kfd_bind_processes_to_device.
+ * Mark currently bound processes as PDD_BOUND_SUSPENDED. These
+ * processes will be restored to PDD_BOUND state in
+ * kfd_bind_processes_to_device.
  */
 void kfd_unbind_processes_from_device(struct kfd_dev *dev)
 {
struct kfd_process_device *pdd;
struct kfd_process *p;
-   unsigned int temp, temp_bound, temp_pasid;
+   unsigned int temp;
 
int idx = srcu_read_lock(_processes_srcu);
 
hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) {
mutex_lock(>mutex);
pdd = kfd_get_process_device_data(dev, p);
-   temp_bound = pdd->bound;
-   temp_pasid = p->pasid;
+
if (pdd->bound == PDD_BOUND)
pdd->bound = PDD_BOUND_SUSPENDED;
mutex_unlock(>mutex);
-
-   if (temp_bound == PDD_BOUND)
-   amd_iommu_unbind_pasid(dev->pdev, temp_pasid);
}
 
srcu_read_unlock(_processes_srcu, idx);
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 2/8] drm/amdkfd: Disable CP/SDMA ring/doorbell in MQD

2017-11-01 Thread Felix Kuehling
From: Jay Cornwall 

The MQD represents an inactive context and should not have ring or
doorbell enable bits set. Doing so interferes with HWS which streams
the MQD onto the HQD. If enable bits are set this activates the ring
or doorbell before the HQD is fully configured.

Signed-off-by: Jay Cornwall 
Signed-off-by: Felix Kuehling 
Reviewed-by: Oded Gabbay 
---
 drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c | 34 +++-
 drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c  |  7 ++---
 2 files changed, 11 insertions(+), 30 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c
index 44ffd23..4859d26 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c
@@ -189,12 +189,9 @@ static int update_mqd(struct mqd_manager *mm, void *mqd,
if (q->format == KFD_QUEUE_FORMAT_AQL)
m->cp_hqd_pq_control |= NO_UPDATE_RPTR;
 
-   q->is_active = false;
-   if (q->queue_size > 0 &&
+   q->is_active = (q->queue_size > 0 &&
q->queue_address != 0 &&
-   q->queue_percent > 0) {
-   q->is_active = true;
-   }
+   q->queue_percent > 0);
 
return 0;
 }
@@ -215,24 +212,17 @@ static int update_mqd_sdma(struct mqd_manager *mm, void 
*mqd,
m->sdma_rlc_rb_base_hi = upper_32_bits(q->queue_address >> 8);
m->sdma_rlc_rb_rptr_addr_lo = lower_32_bits((uint64_t)q->read_ptr);
m->sdma_rlc_rb_rptr_addr_hi = upper_32_bits((uint64_t)q->read_ptr);
-   m->sdma_rlc_doorbell = q->doorbell_off <<
-   SDMA0_RLC0_DOORBELL__OFFSET__SHIFT |
-   1 << SDMA0_RLC0_DOORBELL__ENABLE__SHIFT;
+   m->sdma_rlc_doorbell =
+   q->doorbell_off << SDMA0_RLC0_DOORBELL__OFFSET__SHIFT;
 
m->sdma_rlc_virtual_addr = q->sdma_vm_addr;
 
m->sdma_engine_id = q->sdma_engine_id;
m->sdma_queue_id = q->sdma_queue_id;
 
-   q->is_active = false;
-   if (q->queue_size > 0 &&
+   q->is_active = (q->queue_size > 0 &&
q->queue_address != 0 &&
-   q->queue_percent > 0) {
-   m->sdma_rlc_rb_cntl |=
-   1 << SDMA0_RLC0_RB_CNTL__RB_ENABLE__SHIFT;
-
-   q->is_active = true;
-   }
+   q->queue_percent > 0);
 
return 0;
 }
@@ -359,19 +349,13 @@ static int update_mqd_hiq(struct mqd_manager *mm, void 
*mqd,
m->cp_hqd_pq_base_hi = upper_32_bits((uint64_t)q->queue_address >> 8);
m->cp_hqd_pq_rptr_report_addr_lo = lower_32_bits((uint64_t)q->read_ptr);
m->cp_hqd_pq_rptr_report_addr_hi = upper_32_bits((uint64_t)q->read_ptr);
-   m->cp_hqd_pq_doorbell_control = DOORBELL_EN |
-   DOORBELL_OFFSET(q->doorbell_off);
+   m->cp_hqd_pq_doorbell_control = DOORBELL_OFFSET(q->doorbell_off);
 
m->cp_hqd_vmid = q->vmid;
 
-   m->cp_hqd_active = 0;
-   q->is_active = false;
-   if (q->queue_size > 0 &&
+   q->is_active = (q->queue_size > 0 &&
q->queue_address != 0 &&
-   q->queue_percent > 0) {
-   m->cp_hqd_active = 1;
-   q->is_active = true;
-   }
+   q->queue_percent > 0);
 
return 0;
 }
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c
index 73cbfe1..4ea854f 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c
@@ -163,12 +163,9 @@ static int __update_mqd(struct mqd_manager *mm, void *mqd,
2 << CP_HQD_PQ_CONTROL__SLOT_BASED_WPTR__SHIFT;
}
 
-   q->is_active = false;
-   if (q->queue_size > 0 &&
+   q->is_active = (q->queue_size > 0 &&
q->queue_address != 0 &&
-   q->queue_percent > 0) {
-   q->is_active = true;
-   }
+   q->queue_percent > 0);
 
return 0;
 }
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 0/8] More KFD fixes and cleanups v3

2017-11-01 Thread Felix Kuehling
Another pass through a diff between our internal branch and upstream
yielded a few more fixes and cleanups that were previously missed.

v2: Added a fix for a bug found when syncing upstreamed changes back
into our internal branch (Update queue_count before mapping queues).
Remove a redundant condition (Minor cleanups v2).

v3: Fixed locking order in "Fix debug unregister procedure on process
termination".

Ben Goz (1):
  drm/amdkfd: Register/Deregister process on qpd resolution

Felix Kuehling (2):
  drm/amdkfd: Update queue_count before mapping queues
  drm/amdkfd: Minor cleanups v2

Jay Cornwall (1):
  drm/amdkfd: Disable CP/SDMA ring/doorbell in MQD

Yair Shachar (1):
  drm/amdkfd: Fix debug unregister procedure on process termination v2

Yong Zhao (3):
  drm/amdkfd: Clean up the data structure in kfd_process
  drm/amdkfd: Avoid calling amd_iommu_unbind_pasid() when suspending
  drm/amdkfd: Cleanup DQM ASIC-specific ops

 drivers/gpu/drm/amd/amdkfd/kfd_chardev.c   |  4 +-
 .../gpu/drm/amd/amdkfd/kfd_device_queue_manager.c  | 49 +++
 .../gpu/drm/amd/amdkfd/kfd_device_queue_manager.h  | 11 ++--
 .../drm/amd/amdkfd/kfd_device_queue_manager_cik.c  | 20 +++---
 .../drm/amd/amdkfd/kfd_device_queue_manager_vi.c   | 20 +++---
 drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c   | 34 +++
 drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c|  7 +--
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h  |  8 +--
 drivers/gpu/drm/amd/amdkfd/kfd_process.c   | 71 ++
 .../gpu/drm/amd/amdkfd/kfd_process_queue_manager.c |  9 ++-
 10 files changed, 96 insertions(+), 137 deletions(-)

-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


RE: [PATCH xf86-video-ati] modesetting: Check crtc before searching link-status property

2017-11-01 Thread Deucher, Alexander
> -Original Message-
> From: amd-gfx [mailto:amd-gfx-boun...@lists.freedesktop.org] On Behalf
> Of Michel Dänzer
> Sent: Wednesday, November 01, 2017 1:43 PM
> To: amd-gfx@lists.freedesktop.org
> Subject: [PATCH xf86-video-ati] modesetting: Check crtc before searching
> link-status property
> 
> From: Daniel Martin 
> 
> No need to lookup the link-status property if we don't have a crtc.
> 
> Signed-off-by: Daniel Martin 
> (Ported from xserver commit 8d7f7e24261e68459e6f0a865e243473f65fe7ad)
> Signed-off-by: Michel Dänzer 

Both this and the amdgpu patch are:
Reviewed-by: Alex Deucher 

> ---
>  src/drmmode_display.c | 8 +++-
>  1 file changed, 3 insertions(+), 5 deletions(-)
> 
> diff --git a/src/drmmode_display.c b/src/drmmode_display.c
> index f57c43647..7ad3235a5 100644
> --- a/src/drmmode_display.c
> +++ b/src/drmmode_display.c
> @@ -2796,12 +2796,14 @@ radeon_mode_hotplug(ScrnInfoPtr scrn,
> drmmode_ptr drmmode)
>*/
>   for (i = 0; i < config->num_output; i++) {
>   xf86OutputPtr output = config->output[i];
> + xf86CrtcPtr crtc = output->crtc;
>   drmmode_output_private_ptr drmmode_output = output-
> >driver_private;
>   uint32_t con_id;
>   drmModeConnectorPtr koutput;
> 
> - if (drmmode_output->mode_output == NULL)
> + if (!crtc || !drmmode_output->mode_output)
>   continue;
> +
>   con_id = drmmode_output->mode_output->connector_id;
>   /* Get an updated view of the properties for the current
> connector and
>* look for the link-status property
> @@ -2813,10 +2815,6 @@ radeon_mode_hotplug(ScrnInfoPtr scrn,
> drmmode_ptr drmmode)
>   if (props && props->flags &
> DRM_MODE_PROP_ENUM &&
>   !strcmp(props->name, "link-status") &&
>   koutput->prop_values[j] ==
> DRM_MODE_LINK_STATUS_BAD) {
> - xf86CrtcPtr crtc = output->crtc;
> - if (!crtc)
> - continue;
> -
>   /* the connector got a link failure, re-set the
> current mode */
>   drmmode_set_mode_major(crtc, 
> >mode, crtc->rotation,
>  crtc->x, crtc->y);
> --
> 2.15.0.rc2
> 
> ___
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH xf86-video-amdgpu] modesetting: Check crtc before searching link-status property

2017-11-01 Thread Michel Dänzer
From: Daniel Martin 

No need to lookup the link-status property if we don't have a crtc.

Signed-off-by: Daniel Martin 
(Ported from xserver commit 8d7f7e24261e68459e6f0a865e243473f65fe7ad)
Signed-off-by: Michel Dänzer 
---
 src/drmmode_display.c | 8 +++-
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/src/drmmode_display.c b/src/drmmode_display.c
index 4ca94e71d..1eea7 100644
--- a/src/drmmode_display.c
+++ b/src/drmmode_display.c
@@ -2599,12 +2599,14 @@ amdgpu_mode_hotplug(ScrnInfoPtr scrn, drmmode_ptr 
drmmode)
 */
for (i = 0; i < config->num_output; i++) {
xf86OutputPtr output = config->output[i];
+   xf86CrtcPtr crtc = output->crtc;
drmmode_output_private_ptr drmmode_output = 
output->driver_private;
uint32_t con_id;
drmModeConnectorPtr koutput;
 
-   if (drmmode_output->mode_output == NULL)
+   if (!crtc || !drmmode_output->mode_output)
continue;
+
con_id = drmmode_output->mode_output->connector_id;
/* Get an updated view of the properties for the current 
connector and
 * look for the link-status property
@@ -2616,10 +2618,6 @@ amdgpu_mode_hotplug(ScrnInfoPtr scrn, drmmode_ptr 
drmmode)
if (props && props->flags & DRM_MODE_PROP_ENUM &&
!strcmp(props->name, "link-status") &&
koutput->prop_values[j] == 
DRM_MODE_LINK_STATUS_BAD) {
-   xf86CrtcPtr crtc = output->crtc;
-   if (!crtc)
-   continue;
-
/* the connector got a link failure, re-set the 
current mode */
drmmode_set_mode_major(crtc, >mode, 
crtc->rotation,
   crtc->x, crtc->y);
-- 
2.15.0.rc2

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: KASAN use-after-free report during piglit run

2017-11-01 Thread Michel Dänzer
On 01/11/17 09:47 AM, Christian König wrote:
> Am 31.10.2017 um 18:58 schrieb Michel Dänzer:
>> On 25/10/17 05:43 PM, Michel Dänzer wrote:
>>> KASAN caught another use-after-free on my development machine today, see
>>> the attached dmesg excerpt. There haven't been any related changes in
>>> amd-staging-drm-next since yesterday, so maybe userspace is just
>>> tickling the kernel differently (e.g. piglit runs some more tests in
>>> parallel now). It's not reproducible every time, but it just happened a
>>> second time (with an amd-staging-drm-next commit from about a week ago).
>> I took a closer look, and I think I see what's happening. The
>> use-after-free happens at:
>>
>>    reservation_object_wait_timeout_rcu+0xe02/0xe90
>>    ttm_bo_cleanup_refs_and_unlock+0x271/0x990 [ttm] (ttm_bo.c:530)
>>    ttm_mem_evict_first+0x263/0x4a0 [ttm]
>>
>> The memory was freed at:
>>
>>    [reservation_object_fini]
>>    ttm_bo_cleanup_refs_and_unlock+0x517/0x990 [ttm] (ttm_bo.c:564)
>>    ttm_mem_evict_first+0x263/0x4a0 [ttm]
>>
>> So it's two processes handling the same BO in ttm_mem_evict_first ->
>> ttm_bo_cleanup_refs_and_unlock. The first one unreserved the BO before
>> calling reservation_object_wait_timeout_rcu. Meanwhile, the other one
>> manages to reserve the BO and get all the way to the end of
>> ttm_bo_cleanup_refs_and_unlock, destroying bo->ttm_resv. Then
>> reservation_object_wait_timeout_rcu in the first process still accesses
>> memory which bo->ttm_resv pointed to => boom.
> 
> Good catch. But this means that just grabbing another reference before
> calling reservation_object_wait_timeout_rcu() and we should be on the
> safe side, shouldn't we ?

Grabbing a reference doesn't prevent ttm_bo_cleanup_refs_and_unlock in
another task from destroying bo->ttm_resv, does it?

I sent a patch for review which should fix it.


> Going to take a closer look tomorrow, today is a holiday here and I'm
> actually ill again once more :(

Take rest and get well soon!


-- 
Earthling Michel Dänzer   |   http://www.amd.com
Libre software enthusiast | Mesa and X developer
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH] drm/ttm: Always and only destroy bo->ttm_resv in ttm_bo_release_list

2017-11-01 Thread Michel Dänzer
From: Michel Dänzer 

Fixes a use-after-free due to a race condition in
ttm_bo_cleanup_refs_and_unlock, which allows one task to reserve a BO
and destroy its ttm_resv while another task is waiting for it to signal
in reservation_object_wait_timeout_rcu.

Fixes: 0d2bd2ae045d "drm/ttm: fix memory leak while individualizing BOs"
Signed-off-by: Michel Dänzer 
---
 drivers/gpu/drm/ttm/ttm_bo.c | 13 +++--
 1 file changed, 3 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index 379ec41d2c69..a19a0ebf32ac 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -150,8 +150,7 @@ static void ttm_bo_release_list(struct kref *list_kref)
ttm_tt_destroy(bo->ttm);
atomic_dec(>glob->bo_count);
dma_fence_put(bo->moving);
-   if (bo->resv == >ttm_resv)
-   reservation_object_fini(>ttm_resv);
+   reservation_object_fini(>ttm_resv);
mutex_destroy(>wu_mutex);
if (bo->destroy)
bo->destroy(bo);
@@ -406,10 +405,8 @@ static int ttm_bo_individualize_resv(struct 
ttm_buffer_object *bo)
BUG_ON(!reservation_object_trylock(>ttm_resv));
 
r = reservation_object_copy_fences(>ttm_resv, bo->resv);
-   if (r) {
+   if (r)
reservation_object_unlock(>ttm_resv);
-   reservation_object_fini(>ttm_resv);
-   }
 
return r;
 }
@@ -457,10 +454,8 @@ static void ttm_bo_cleanup_refs_or_queue(struct 
ttm_buffer_object *bo)
if (reservation_object_test_signaled_rcu(>ttm_resv, true)) {
ttm_bo_del_from_lru(bo);
spin_unlock(>lru_lock);
-   if (bo->resv != >ttm_resv) {
+   if (bo->resv != >ttm_resv)
reservation_object_unlock(>ttm_resv);
-   reservation_object_fini(>ttm_resv);
-   }
 
ttm_bo_cleanup_memtype_use(bo);
return;
@@ -560,8 +555,6 @@ static int ttm_bo_cleanup_refs_and_unlock(struct 
ttm_buffer_object *bo,
}
 
ttm_bo_del_from_lru(bo);
-   if (!list_empty(>ddestroy) && (bo->resv != >ttm_resv))
-   reservation_object_fini(>ttm_resv);
list_del_init(>ddestroy);
kref_put(>list_kref, ttm_bo_ref_bug);
 
-- 
2.15.0.rc2

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] drm/amdgpu:read VRAMLOST from gim

2017-11-01 Thread Alex Deucher
On Tue, Oct 31, 2017 at 11:45 PM, Monk Liu  wrote:
> Change-Id: I6a268903465004d6e8f65f135734094772b9f614
> Signed-off-by: Monk Liu 

Acked-by: Alex Deucher 

> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 9 -
>  drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c   | 1 +
>  drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h   | 3 +++
>  3 files changed, 8 insertions(+), 5 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> index 068b56a..eccb3fa 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> @@ -2953,11 +2953,10 @@ static int amdgpu_reset_sriov(struct amdgpu_device 
> *adev, uint64_t *reset_flags,
> amdgpu_virt_release_full_gpu(adev, true);
>
> if (reset_flags) {
> -   /* will get vram_lost from GIM in future, now all
> -* reset request considered VRAM LOST
> -*/
> -   (*reset_flags) |= ~AMDGPU_RESET_INFO_VRAM_LOST;
> -   atomic_inc(>vram_lost_counter);
> +   if (adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) 
> {
> +   (*reset_flags) |= AMDGPU_RESET_INFO_VRAM_LOST;
> +   atomic_inc(>vram_lost_counter);
> +   }
>
> /* VF FLR or hotlink reset is always full-reset */
> (*reset_flags) |= AMDGPU_RESET_INFO_FULLRESET;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
> index f791518..9cd030a 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
> @@ -332,6 +332,7 @@ void amdgpu_virt_init_data_exchange(struct amdgpu_device 
> *adev)
> pf2vf_ver = adev->virt.fw_reserve.p_pf2vf->version;
> AMDGPU_FW_VRAM_PF2VF_READ(adev, header.size, _size);
> AMDGPU_FW_VRAM_PF2VF_READ(adev, checksum, );
> +   AMDGPU_FW_VRAM_PF2VF_READ(adev, feature_flags, 
> >virt.gim_feature);
>
> /* pf2vf message must be in 4K */
> if (pf2vf_size > 0 && pf2vf_size < 4096) {
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
> index e3f78f5..f77d116 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
> @@ -81,6 +81,8 @@ enum AMDGIM_FEATURE_FLAG {
> AMDGIM_FEATURE_ERROR_LOG_COLLECT = 0x1,
> /* GIM supports feature of loading uCodes */
> AMDGIM_FEATURE_GIM_LOAD_UCODES   = 0x2,
> +   /* VRAM LOST by GIM */
> +   AMDGIM_FEATURE_GIM_FLR_VRAMLOST = 0x4,
>  };
>
>  struct amdgim_pf2vf_info_header {
> @@ -246,6 +248,7 @@ struct amdgpu_virt {
> const struct amdgpu_virt_ops*ops;
> struct amdgpu_vf_error_buffer   vf_errors;
> struct amdgpu_virt_fw_reserve   fw_reserve;
> +   uint32_t gim_feature;
>  };
>
>  #define AMDGPU_CSA_SIZE(8 * 1024)
> --
> 2.7.4
>
> ___
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH V3 2/2] drm/amd/display: Read resource_straps from registers for DCE12

2017-11-01 Thread Harry Wentland
On 2017-11-01 10:24 AM, sunpeng...@amd.com wrote:
> From: "Leo (Sunpeng) Li" 
> 
> Now that the registers exist, assign them to the resource_straps struct.
> 
> v2: Fix indentation
> v3: Fix trailing whitespace and checkpatch warnings.
> 
> Signed-off-by: Leo (Sunpeng) Li 

Series is
Reviewed-by: Harry Wentland 

Harry

> ---
>  .../gpu/drm/amd/display/dc/dce120/dce120_resource.c   | 19 
> +--
>  1 file changed, 13 insertions(+), 6 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c 
> b/drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c
> index 3ed28a8..5c48c22 100644
> --- a/drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c
> +++ b/drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c
> @@ -501,12 +501,19 @@ static void read_dce_straps(
>   struct dc_context *ctx,
>   struct resource_straps *straps)
>  {
> - /* TODO: Registers are missing */
> - /*REG_GET_2(CC_DC_HDMI_STRAPS,
> - HDMI_DISABLE, >hdmi_disable,
> - AUDIO_STREAM_NUMBER, >audio_stream_number);
> -
> - REG_GET(DC_PINSTRAPS, DC_PINSTRAPS_AUDIO, 
> >dc_pinstraps_audio);*/
> + uint32_t reg_val = dm_read_reg_soc15(ctx, mmCC_DC_MISC_STRAPS, 0);
> +
> + straps->audio_stream_number = get_reg_field_value(reg_val,
> +   CC_DC_MISC_STRAPS,
> +   AUDIO_STREAM_NUMBER);
> + straps->hdmi_disable = get_reg_field_value(reg_val,
> +CC_DC_MISC_STRAPS,
> +HDMI_DISABLE);
> +
> + reg_val = dm_read_reg_soc15(ctx, mmDC_PINSTRAPS, 0);
> + straps->dc_pinstraps_audio = get_reg_field_value(reg_val,
> +  DC_PINSTRAPS,
> +  DC_PINSTRAPS_AUDIO);
>  }
>  
>  static struct audio *create_audio(
> 
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH V3 2/2] drm/amd/display: Read resource_straps from registers for DCE12

2017-11-01 Thread sunpeng.li
From: "Leo (Sunpeng) Li" 

Now that the registers exist, assign them to the resource_straps struct.

v2: Fix indentation
v3: Fix trailing whitespace and checkpatch warnings.

Signed-off-by: Leo (Sunpeng) Li 
---
 .../gpu/drm/amd/display/dc/dce120/dce120_resource.c   | 19 +--
 1 file changed, 13 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c 
b/drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c
index 3ed28a8..5c48c22 100644
--- a/drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c
@@ -501,12 +501,19 @@ static void read_dce_straps(
struct dc_context *ctx,
struct resource_straps *straps)
 {
-   /* TODO: Registers are missing */
-   /*REG_GET_2(CC_DC_HDMI_STRAPS,
-   HDMI_DISABLE, >hdmi_disable,
-   AUDIO_STREAM_NUMBER, >audio_stream_number);
-
-   REG_GET(DC_PINSTRAPS, DC_PINSTRAPS_AUDIO, 
>dc_pinstraps_audio);*/
+   uint32_t reg_val = dm_read_reg_soc15(ctx, mmCC_DC_MISC_STRAPS, 0);
+
+   straps->audio_stream_number = get_reg_field_value(reg_val,
+ CC_DC_MISC_STRAPS,
+ AUDIO_STREAM_NUMBER);
+   straps->hdmi_disable = get_reg_field_value(reg_val,
+  CC_DC_MISC_STRAPS,
+  HDMI_DISABLE);
+
+   reg_val = dm_read_reg_soc15(ctx, mmDC_PINSTRAPS, 0);
+   straps->dc_pinstraps_audio = get_reg_field_value(reg_val,
+DC_PINSTRAPS,
+DC_PINSTRAPS_AUDIO);
 }
 
 static struct audio *create_audio(
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] drm/amd/powerplay: wrong control mode cause the fan spins faster unnecessarily

2017-11-01 Thread Alex Deucher
On Wed, Nov 1, 2017 at 5:24 AM, Evan Quan  wrote:
> The fan control mode can either be FDO_PWM_MODE_STATIC or 
> FDO_PWM_MODE_STATIC_RPM.
> Setting it as AMD_FAN_CTRL_AUTO will cause the fan spin faster wrongly.
>
> This can be reproduced by:
> '# cat /sys/class/hwmon/hwmon0/pwm1
>38
> '# cat /sys/class/hwmon/hwmon0/pwm1_enable
>2
> '# echo "2" > /sys/class/hwmon/hwmon0/pwm1_enable
> '# cat /sys/class/hwmon/hwmon0/pwm1
>122
> The fan speed get faster wronly even with its original mode echo back.
>
> Change-Id: I51b0586ac17e0d663925c87d548847b31c79d68a
> Signed-off-by: Evan Quan 

Reviewed-by: Alex Deucher 

> ---
>  drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c 
> b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
> index 203ef10..4239b98 100644
> --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
> +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
> @@ -4234,7 +4234,7 @@ static void vega10_set_fan_control_mode(struct pp_hwmgr 
> *hwmgr, uint32_t mode)
> vega10_fan_ctrl_stop_smc_fan_control(hwmgr);
> break;
> case AMD_FAN_CTRL_AUTO:
> -   if (!vega10_fan_ctrl_set_static_mode(hwmgr, mode))
> +   if (PP_CAP(PHM_PlatformCaps_MicrocodeFanControl))
> vega10_fan_ctrl_start_smc_fan_control(hwmgr);
> break;
> default:
> --
> 2.7.4
>
> ___
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 2/2] drm/amdgpu: allow harvesting check for Polaris VCE

2017-11-01 Thread Alex Deucher
On Wed, Nov 1, 2017 at 9:10 AM, Leo Liu  wrote:
> Signed-off-by: Leo Liu 

Reviewed-by: Alex Deucher 

> ---
>  drivers/gpu/drm/amd/amdgpu/vce_v3_0.c | 12 ++--
>  1 file changed, 6 insertions(+), 6 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c 
> b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
> index 90332f55cfba..cf81065e3c5a 100644
> --- a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
> @@ -365,15 +365,10 @@ static unsigned vce_v3_0_get_harvest_config(struct 
> amdgpu_device *adev)
>  {
> u32 tmp;
>
> -   /* Fiji, Stoney, Polaris10, Polaris11, Polaris12 are single pipe */
> if ((adev->asic_type == CHIP_FIJI) ||
> -   (adev->asic_type == CHIP_STONEY) ||
> -   (adev->asic_type == CHIP_POLARIS10) ||
> -   (adev->asic_type == CHIP_POLARIS11) ||
> -   (adev->asic_type == CHIP_POLARIS12))
> +   (adev->asic_type == CHIP_STONEY))
> return AMDGPU_VCE_HARVEST_VCE1;
>
> -   /* Tonga and CZ are dual or single pipe */
> if (adev->flags & AMD_IS_APU)
> tmp = (RREG32_SMC(ixVCE_HARVEST_FUSE_MACRO__ADDRESS) &
>VCE_HARVEST_FUSE_MACRO__MASK) >>
> @@ -391,6 +386,11 @@ static unsigned vce_v3_0_get_harvest_config(struct 
> amdgpu_device *adev)
> case 3:
> return AMDGPU_VCE_HARVEST_VCE0 | AMDGPU_VCE_HARVEST_VCE1;
> default:
> +   if ((adev->asic_type == CHIP_POLARIS10) ||
> +   (adev->asic_type == CHIP_POLARIS11) ||
> +   (adev->asic_type == CHIP_POLARIS12))
> +   return AMDGPU_VCE_HARVEST_VCE1;
> +
> return 0;
> }
>  }
> --
> 2.14.1
>
> ___
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [Mesa-dev] [PATCH] amdgpu: Add R600_DEBUG flag to reserve VMID per ctx.

2017-11-01 Thread Marek Olšák
Yeah, it should be called when the winsys is created.

Marek

On Wed, Nov 1, 2017 at 9:49 AM, Christian König
 wrote:
> I'm not 100% sure that patch was correct.
>
> When is amdgpu_ctx_create() called? The VMID is reserved for the whole
> process, not just a context.
>
> Regards,
> Christian.
>
>
> Am 31.10.2017 um 16:57 schrieb Marek Olšák:
>>
>> I addressed the feedback and pushed the patch.
>>
>> Marek
>>
>> On Tue, Oct 31, 2017 at 4:50 PM, Michel Dänzer  wrote:
>>>
>>> On 31/10/17 04:40 PM, Andrey Grodzovsky wrote:

 Signed-off-by: Andrey Grodzovsky 
>>>
>>> [...]
>>>
 diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
 b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
 index 8f43e93..1155492 100644
 --- a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
 +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
 @@ -256,6 +256,14 @@ static struct radeon_winsys_ctx
 *amdgpu_ctx_create(struct radeon_winsys *ws)
 goto error_create;
  }

 +   if (ctx->ws->reserve_vmid) {
 +r = amdgpu_vm_reserve_vmid(ctx->ctx, 0);
 +if (r) {
 + fprintf(stderr, "amdgpu: amdgpu_cs_ctx_create failed.
 (%i)\n", r);
>>>
>>> This should say "amdgpu: amdgpu_vm_reserve_vmid failed. (%i)\n".
>>>
>>>
>>> --
>>> Earthling Michel Dänzer   |   http://www.amd.com
>>> Libre software enthusiast | Mesa and X developer
>>> ___
>>> mesa-dev mailing list
>>> mesa-...@lists.freedesktop.org
>>> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
>
>
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 2/2] drm/amdgpu: allow harvesting check for Polaris VCE

2017-11-01 Thread Leo Liu
Signed-off-by: Leo Liu 
---
 drivers/gpu/drm/amd/amdgpu/vce_v3_0.c | 12 ++--
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c 
b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
index 90332f55cfba..cf81065e3c5a 100644
--- a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
@@ -365,15 +365,10 @@ static unsigned vce_v3_0_get_harvest_config(struct 
amdgpu_device *adev)
 {
u32 tmp;
 
-   /* Fiji, Stoney, Polaris10, Polaris11, Polaris12 are single pipe */
if ((adev->asic_type == CHIP_FIJI) ||
-   (adev->asic_type == CHIP_STONEY) ||
-   (adev->asic_type == CHIP_POLARIS10) ||
-   (adev->asic_type == CHIP_POLARIS11) ||
-   (adev->asic_type == CHIP_POLARIS12))
+   (adev->asic_type == CHIP_STONEY))
return AMDGPU_VCE_HARVEST_VCE1;
 
-   /* Tonga and CZ are dual or single pipe */
if (adev->flags & AMD_IS_APU)
tmp = (RREG32_SMC(ixVCE_HARVEST_FUSE_MACRO__ADDRESS) &
   VCE_HARVEST_FUSE_MACRO__MASK) >>
@@ -391,6 +386,11 @@ static unsigned vce_v3_0_get_harvest_config(struct 
amdgpu_device *adev)
case 3:
return AMDGPU_VCE_HARVEST_VCE0 | AMDGPU_VCE_HARVEST_VCE1;
default:
+   if ((adev->asic_type == CHIP_POLARIS10) ||
+   (adev->asic_type == CHIP_POLARIS11) ||
+   (adev->asic_type == CHIP_POLARIS12))
+   return AMDGPU_VCE_HARVEST_VCE1;
+
return 0;
}
 }
-- 
2.14.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 1/2] drm/amdgpu: return -ENOENT from uvd 6.0 early init for harvesting

2017-11-01 Thread Leo Liu
Signed-off-by: Leo Liu 
---
 drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c 
b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
index 7e4de3e6950f..0c01825a8b9e 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
@@ -371,6 +371,10 @@ static int uvd_v6_0_early_init(void *handle)
 {
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
+   if (!(adev->flags & AMD_IS_APU) &&
+   (RREG32_SMC(ixCC_HARVEST_FUSES) & 
CC_HARVEST_FUSES__UVD_DISABLE_MASK))
+   return -ENOENT;
+
uvd_v6_0_set_ring_funcs(adev);
 
if (uvd_v6_0_enc_support(adev)) {
-- 
2.14.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH] drm/amdgpu: refine SR-IOV firmware VRAM reservation to protect data

2017-11-01 Thread Horace Chen
The previous solution will create a zero buffer on the system
domain and then move the zeroes to the VRAM. This will break the
original data on the VRAM.

Refine the code to create bo on VRAM domain directly and then remove
and re-create mem node to the exact position before bo_pin. This can
avoid breaking the data and will not cause eviction.

Signed-off-by: Horace Chen 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 23 ++-
 1 file changed, 22 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 068b56a..8b33adf 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -680,8 +680,12 @@ void amdgpu_fw_reserve_vram_fini(struct amdgpu_device 
*adev)
 int amdgpu_fw_reserve_vram_init(struct amdgpu_device *adev)
 {
int r = 0;
+   int i;
u64 gpu_addr;
u64 vram_size = adev->mc.visible_vram_size;
+   u64 offset = adev->fw_vram_usage.start_offset;
+   u64 size = adev->fw_vram_usage.size;
+   struct amdgpu_bo *bo;
 
adev->fw_vram_usage.va = NULL;
adev->fw_vram_usage.reserved_bo = NULL;
@@ -690,7 +694,7 @@ int amdgpu_fw_reserve_vram_init(struct amdgpu_device *adev)
adev->fw_vram_usage.size <= vram_size) {
 
r = amdgpu_bo_create(adev, adev->fw_vram_usage.size,
-   PAGE_SIZE, true, 0,
+   PAGE_SIZE, true, AMDGPU_GEM_DOMAIN_VRAM,
AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS, NULL, NULL, 0,
>fw_vram_usage.reserved_bo);
@@ -700,6 +704,23 @@ int amdgpu_fw_reserve_vram_init(struct amdgpu_device *adev)
r = amdgpu_bo_reserve(adev->fw_vram_usage.reserved_bo, false);
if (r)
goto error_reserve;
+
+   /* remove the original mem node and create a new one at the
+* request position
+*/
+   bo = adev->fw_vram_usage.reserved_bo;
+   offset = ALIGN(offset, PAGE_SIZE);
+   for (i = 0; i < bo->placement.num_placement; ++i) {
+   bo->placements[i].fpfn = offset >> PAGE_SHIFT;
+   bo->placements[i].lpfn = (offset + size) >> PAGE_SHIFT;
+   }
+
+   ttm_bo_mem_put(>tbo, >tbo.mem);
+   r = ttm_bo_mem_space(>tbo, >placement, >tbo.mem,
+false, false);
+   if (r)
+   goto error_pin;
+
r = amdgpu_bo_pin_restricted(adev->fw_vram_usage.reserved_bo,
AMDGPU_GEM_DOMAIN_VRAM,
adev->fw_vram_usage.start_offset,
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


RE: [PATCH 1/3] drm/amdgpu: Add interface to protect VRAM at exact position

2017-11-01 Thread Liu, Monk
+   if (((domain & AMDGPU_GEM_DOMAIN_VRAM) ||
+   (domain & AMDGPU_GEM_DOMAIN_GTT)) && offset != ~0) {

Why not make it simple? Like:

if ((domain & (AMDGPU_GEM_DOMAIN_VRAM|AMDGPU_GEM_DOMAIN_GTT)) && offset !=~0)

-Original Message-
From: amd-gfx [mailto:amd-gfx-boun...@lists.freedesktop.org] On Behalf Of 
Horace Chen
Sent: 2017年11月1日 16:58
To: amd-gfx@lists.freedesktop.org
Cc: Chen, Horace 
Subject: [PATCH 1/3] drm/amdgpu: Add interface to protect VRAM at exact position

The existing method to reserve specified VRAM is to create a bo on system 
domain then pin it to VRAM. But in this process the existing data on the VRAM 
will be broken, because ttm will allocate a zero buffer on system domain then 
copy the zeroes to VRAM.

Actually SRIOV need to reserve VRAM to protect data at exact position.

So here add a new interface to create bo at the exact position directly and 
then pin it immediately to avoid eviction and avoid breaking the existing data 
on the VRAM.

Signed-off-by: Horace Chen 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 84 -- 
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.h |  4 ++
 2 files changed, 85 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index e44b880..7108dc5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -281,7 +281,7 @@ void amdgpu_bo_free_kernel(struct amdgpu_bo **bo, u64 
*gpu_addr,
*cpu_addr = NULL;
 }
 
-static int amdgpu_bo_do_create(struct amdgpu_device *adev,
+static int amdgpu_bo_do_create(struct amdgpu_device *adev, u64 offset,
   unsigned long size, int byte_align,
   bool kernel, u32 domain, u64 flags,
   struct sg_table *sg,
@@ -295,6 +295,7 @@ static int amdgpu_bo_do_create(struct amdgpu_device *adev,
u64 initial_bytes_moved, bytes_moved;
size_t acc_size;
int r;
+   int i;
 
page_align = roundup(byte_align, PAGE_SIZE) >> PAGE_SHIFT;
size = ALIGN(size, PAGE_SIZE);
@@ -364,6 +365,21 @@ static int amdgpu_bo_do_create(struct amdgpu_device *adev,
bo->tbo.bdev = >mman.bdev;
amdgpu_ttm_placement_from_domain(bo, domain);
 
+   /*
+* if offset is not ~0, it means the offset of this bo is restricted.
+* modify the placement to create bo at the exact place.
+*/
+   if (((domain & AMDGPU_GEM_DOMAIN_VRAM) ||
+   (domain & AMDGPU_GEM_DOMAIN_GTT)) && offset != ~0) {
+   offset = ALIGN(offset, PAGE_SIZE);
+   for (i = 0; i < bo->placement.num_placement; ++i) {
+   bo->placements[i].fpfn =
+   offset >> PAGE_SHIFT;
+   bo->placements[i].lpfn =
+   (offset + size) >> PAGE_SHIFT;
+   }
+   }
+
initial_bytes_moved = atomic64_read(>num_bytes_moved);
/* Kernel allocation are uninterruptible */
r = ttm_bo_init_reserved(>mman.bdev, >tbo, size, type, @@ 
-416,6 +432,68 @@ static int amdgpu_bo_do_create(struct amdgpu_device *adev,
return r;
 }
 
+/**
+ * amdgpu_bo_create_vram_restricted_kernel -
+ *create BO at the specified place on the VRAM.
+ *
+ * @adev: amdgpu device object
+ * @offset: start offset of the new BO
+ * @size: size for the new BO
+ * @byte_align: alignment for the new BO
+ * @flags: addition flags for the BO
+ * @bo_ptr: resulting BO
+ * @gpu_addr: GPU addr of the pinned BO
+ * @cpu_addr: optional CPU address mapping
+ *
+ * Allocates and pins a BO at the specified place on the VRAM.
+ *
+ * Returns 0 on success, negative error code otherwise.
+ */
+int amdgpu_bo_create_vram_restricted_kernel(struct amdgpu_device *adev,
+u64 offset, unsigned long size, int byte_align,
+u64 flags, struct amdgpu_bo **bo_ptr,
+u64 *gpu_addr, void **cpu_addr)
+{
+   int r;
+   /* specified memory must be in contiguous*/
+   flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
+
+   r = amdgpu_bo_do_create(adev, offset, size, byte_align, true,
+   AMDGPU_GEM_DOMAIN_VRAM, flags, NULL, NULL, 0, bo_ptr);
+   if (r)
+   return r;
+
+   r = amdgpu_bo_reserve(*bo_ptr, false);
+   if (r)
+   goto error_reserve;
+   r = amdgpu_bo_pin_restricted(*bo_ptr,
+   AMDGPU_GEM_DOMAIN_VRAM, offset, (offset + size),
+   gpu_addr);
+   if (r)
+   goto error_pin;
+   if (cpu_addr) {
+   r = amdgpu_bo_kmap(*bo_ptr,
+   cpu_addr);
+   if (r)
+   goto error_kmap;
+   }
+
+   amdgpu_bo_unreserve(*bo_ptr);
+
+   return r;
+error_kmap:
+   amdgpu_bo_unpin(*bo_ptr);

RE: [PATCH 3/3] drm/amdgpu: pick SR-IOV fw reservation information in atomfirmware

2017-11-01 Thread Liu, Monk
This one is already submitted

-Original Message-
From: amd-gfx [mailto:amd-gfx-boun...@lists.freedesktop.org] On Behalf Of 
Horace Chen
Sent: 2017年11月1日 16:58
To: amd-gfx@lists.freedesktop.org
Cc: Chen, Horace 
Subject: [PATCH 3/3] drm/amdgpu: pick SR-IOV fw reservation information in 
atomfirmware

SR-IOV need to get start offset and size from firmware for its vram 
reservation. This logic had been add to the atombios code path.

As the current branch will run atomfirmware by default, add same logic to 
atomfirmware code path.

Signed-off-by: Horace Chen 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c | 14 ++
 1 file changed, 14 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c
index f9ffe8e..8509907 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c
@@ -73,6 +73,8 @@ int amdgpu_atomfirmware_allocate_fb_scratch(struct 
amdgpu_device *adev)
vram_usagebyfirmware);
uint16_t data_offset;
int usage_bytes = 0;
+   u64 start_addr;
+   u64 size;
 
if (amdgpu_atom_parse_data_header(ctx, index, NULL, NULL, NULL, 
_offset)) {
struct vram_usagebyfirmware_v2_1 *firmware_usage = @@ -83,6 
+85,18 @@ int amdgpu_atomfirmware_allocate_fb_scratch(struct amdgpu_device 
*adev)
  le16_to_cpu(firmware_usage->used_by_firmware_in_kb),
  le16_to_cpu(firmware_usage->used_by_driver_in_kb));
 
+   start_addr = firmware_usage->start_address_in_kb;
+   size = firmware_usage->used_by_firmware_in_kb;
+
+   if ((uint32_t)(start_addr & ATOM_VRAM_OPERATION_FLAGS_MASK) ==
+   (uint32_t)(ATOM_VRAM_BLOCK_SRIOV_MSG_SHARE_RESERVATION 
<<
+   ATOM_VRAM_OPERATION_FLAGS_SHIFT)) {
+   /* Firmware request VRAM reservation for SR-IOV */
+   adev->fw_vram_usage.start_offset = (start_addr &
+   (~ATOM_VRAM_OPERATION_FLAGS_MASK)) << 10;
+   adev->fw_vram_usage.size = size << 10;
+   /* Use the default scratch size */
+   }
usage_bytes = le16_to_cpu(firmware_usage->used_by_driver_in_kb) 
* 1024;
}
ctx->scratch_size_bytes = 0;
--
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 8/8] drm/amdkfd: Minor cleanups v2

2017-11-01 Thread Oded Gabbay
On Thu, Oct 26, 2017 at 1:41 AM, Felix Kuehling  wrote:
> These were missed previously when rebasing changes for upstreaming.
>
> v2: Remove redundant sched_policy conditions
>
> Signed-off-by: Felix Kuehling 
> ---
>  drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 10 --
>  drivers/gpu/drm/amd/amdkfd/kfd_priv.h |  2 ++
>  drivers/gpu/drm/amd/amdkfd/kfd_process.c  |  2 +-
>  3 files changed, 7 insertions(+), 7 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c 
> b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> index e2fc4c5..e202921 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> @@ -389,12 +389,11 @@ static int update_queue(struct device_queue_manager 
> *dqm, struct queue *q)
> if (sched_policy != KFD_SCHED_POLICY_NO_HWS) {
> retval = unmap_queues_cpsch(dqm,
> KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
> -   if (retval != 0) {
> +   if (retval) {
> pr_err("unmap queue failed\n");
> goto out_unlock;
> }
> -   } else if (sched_policy == KFD_SCHED_POLICY_NO_HWS &&
> -  prev_active &&
> +   } else if (prev_active &&
>(q->properties.type == KFD_QUEUE_TYPE_COMPUTE ||
> q->properties.type == KFD_QUEUE_TYPE_SDMA)) {
> retval = mqd->destroy_mqd(mqd, q->mqd,
> @@ -421,8 +420,7 @@ static int update_queue(struct device_queue_manager *dqm, 
> struct queue *q)
>
> if (sched_policy != KFD_SCHED_POLICY_NO_HWS)
> retval = map_queues_cpsch(dqm);
> -   else if (sched_policy == KFD_SCHED_POLICY_NO_HWS &&
> -q->properties.is_active &&
> +   else if (q->properties.is_active &&
>  (q->properties.type == KFD_QUEUE_TYPE_COMPUTE ||
>   q->properties.type == KFD_QUEUE_TYPE_SDMA))
> retval = mqd->load_mqd(mqd, q->mqd, q->pipe, q->queue,
> @@ -832,7 +830,7 @@ static int create_queue_cpsch(struct device_queue_manager 
> *dqm, struct queue *q,
>
> if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
> retval = allocate_sdma_queue(dqm, >sdma_id);
> -   if (retval != 0)
> +   if (retval)
> goto out;
> q->properties.sdma_queue_id =
> q->sdma_id / CIK_SDMA_QUEUES_PER_ENGINE;
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h 
> b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> index 3b0cc20..9e27c16 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> @@ -507,6 +507,8 @@ struct kfd_process {
>  * In any process, the thread that started main() is the lead
>  * thread and outlives the rest.
>  * It is here because amd_iommu_bind_pasid wants a task_struct.
> +* It can also be used for safely getting a reference to the
> +* mm_struct of the process.
>  */
> struct task_struct *lead_thread;
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c 
> b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
> index 6caf6df..674b788 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
> @@ -421,7 +421,7 @@ int kfd_bind_processes_to_device(struct kfd_dev *dev)
> err = amd_iommu_bind_pasid(dev->pdev, p->pasid,
> p->lead_thread);
> if (err < 0) {
> -   pr_err("unexpected pasid %d binding failure\n",
> +   pr_err("Unexpected pasid %d binding failure\n",
> p->pasid);
> mutex_unlock(>mutex);
> break;
> --
> 2.7.4
>
This patch is:
Reviewed-by: Oded Gabbay 
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 7/8] drm/amdkfd: Update queue_count before mapping queues

2017-11-01 Thread Oded Gabbay
On Thu, Oct 26, 2017 at 1:41 AM, Felix Kuehling  wrote:
> map_queues_cpsch uses the queue_count to decide whether to upload
> a new runlist. So update the counter before calling it.
>
> Signed-off-by: Felix Kuehling 
> ---
>  .../gpu/drm/amd/amdkfd/kfd_device_queue_manager.c| 20 
> +++-
>  1 file changed, 11 insertions(+), 9 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c 
> b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> index 45b98dd..e2fc4c5 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> @@ -408,6 +408,17 @@ static int update_queue(struct device_queue_manager 
> *dqm, struct queue *q)
>
> retval = mqd->update_mqd(mqd, q->mqd, >properties);
>
> +   /*
> +* check active state vs. the previous state and modify
> +* counter accordingly. map_queues_cpsch uses the
> +* dqm->queue_count to determine whether a new runlist must be
> +* uploaded.
> +*/
> +   if (q->properties.is_active && !prev_active)
> +   dqm->queue_count++;
> +   else if (!q->properties.is_active && prev_active)
> +   dqm->queue_count--;
> +
> if (sched_policy != KFD_SCHED_POLICY_NO_HWS)
> retval = map_queues_cpsch(dqm);
> else if (sched_policy == KFD_SCHED_POLICY_NO_HWS &&
> @@ -417,15 +428,6 @@ static int update_queue(struct device_queue_manager 
> *dqm, struct queue *q)
> retval = mqd->load_mqd(mqd, q->mqd, q->pipe, q->queue,
>>properties, q->process->mm);
>
> -   /*
> -* check active state vs. the previous state
> -* and modify counter accordingly
> -*/
> -   if (q->properties.is_active && !prev_active)
> -   dqm->queue_count++;
> -   else if (!q->properties.is_active && prev_active)
> -   dqm->queue_count--;
> -
>  out_unlock:
> mutex_unlock(>lock);
> return retval;
> --
> 2.7.4
>
This patch is:
Reviewed-by: Oded Gabbay 
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 6/8] drm/amdkfd: Cleanup DQM ASIC-specific ops

2017-11-01 Thread Oded Gabbay
On Thu, Oct 26, 2017 at 1:41 AM, Felix Kuehling  wrote:
> From: Yong Zhao 
>
> Remove empty initialize function.
>
> Rename register_process to update_qpd to avoid confusion with the
> non-ASIC-specific register_process.
>
> Shorten ops_asic_specific to asic_ops.
>
> Signed-off-by: Yong Zhao 
> Signed-off-by: Felix Kuehling 
> ---
>  .../gpu/drm/amd/amdkfd/kfd_device_queue_manager.c| 19 +++
>  .../gpu/drm/amd/amdkfd/kfd_device_queue_manager.h| 11 ++-
>  .../drm/amd/amdkfd/kfd_device_queue_manager_cik.c| 20 
> +++-
>  .../gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c | 20 
> +++-
>  4 files changed, 27 insertions(+), 43 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c 
> b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> index da3b743..45b98dd 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> @@ -467,7 +467,7 @@ static int register_process(struct device_queue_manager 
> *dqm,
> mutex_lock(>lock);
> list_add(>list, >queues);
>
> -   retval = dqm->ops_asic_specific.register_process(dqm, qpd);
> +   retval = dqm->asic_ops.update_qpd(dqm, qpd);
>
> dqm->processes_count++;
>
> @@ -629,7 +629,7 @@ static int create_sdma_queue_nocpsch(struct 
> device_queue_manager *dqm,
> pr_debug("SDMA queue id: %d\n", q->properties.sdma_queue_id);
> pr_debug("SDMA engine id: %d\n", q->properties.sdma_engine_id);
>
> -   dqm->ops_asic_specific.init_sdma_vm(dqm, q, qpd);
> +   dqm->asic_ops.init_sdma_vm(dqm, q, qpd);
> retval = mqd->init_mqd(mqd, >mqd, >mqd_mem_obj,
> >gart_mqd_addr, >properties);
> if (retval)
> @@ -696,8 +696,6 @@ static int set_sched_resources(struct 
> device_queue_manager *dqm)
>
>  static int initialize_cpsch(struct device_queue_manager *dqm)
>  {
> -   int retval;
> -
> pr_debug("num of pipes: %d\n", get_pipes_per_mec(dqm));
>
> mutex_init(>lock);
> @@ -706,11 +704,8 @@ static int initialize_cpsch(struct device_queue_manager 
> *dqm)
> dqm->sdma_queue_count = 0;
> dqm->active_runlist = false;
> dqm->sdma_bitmap = (1 << CIK_SDMA_QUEUES) - 1;
> -   retval = dqm->ops_asic_specific.initialize(dqm);
> -   if (retval)
> -   mutex_destroy(>lock);
>
> -   return retval;
> +   return 0;
>  }
>
>  static int start_cpsch(struct device_queue_manager *dqm)
> @@ -850,7 +845,7 @@ static int create_queue_cpsch(struct device_queue_manager 
> *dqm, struct queue *q,
> goto out;
> }
>
> -   dqm->ops_asic_specific.init_sdma_vm(dqm, q, qpd);
> +   dqm->asic_ops.init_sdma_vm(dqm, q, qpd);
> retval = mqd->init_mqd(mqd, >mqd, >mqd_mem_obj,
> >gart_mqd_addr, >properties);
> if (retval)
> @@ -1095,7 +1090,7 @@ static bool set_cache_memory_policy(struct 
> device_queue_manager *dqm,
> qpd->sh_mem_ape1_limit = limit >> 16;
> }
>
> -   retval = dqm->ops_asic_specific.set_cache_memory_policy(
> +   retval = dqm->asic_ops.set_cache_memory_policy(
> dqm,
> qpd,
> default_policy,
> @@ -1270,11 +1265,11 @@ struct device_queue_manager 
> *device_queue_manager_init(struct kfd_dev *dev)
>
> switch (dev->device_info->asic_family) {
> case CHIP_CARRIZO:
> -   device_queue_manager_init_vi(>ops_asic_specific);
> +   device_queue_manager_init_vi(>asic_ops);
> break;
>
> case CHIP_KAVERI:
> -   device_queue_manager_init_cik(>ops_asic_specific);
> +   device_queue_manager_init_cik(>asic_ops);
> break;
> default:
> WARN(1, "Unexpected ASIC family %u",
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h 
> b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
> index 31c2b1f..5b77cb6 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
> @@ -128,9 +128,8 @@ struct device_queue_manager_ops {
>  };
>
>  struct device_queue_manager_asic_ops {
> -   int (*register_process)(struct device_queue_manager *dqm,
> +   int (*update_qpd)(struct device_queue_manager *dqm,
> struct qcm_process_device *qpd);
> -   int (*initialize)(struct device_queue_manager *dqm);
> bool(*set_cache_memory_policy)(struct device_queue_manager *dqm,
>struct qcm_process_device *qpd,
>enum cache_policy default_policy,
> @@ -156,7 +155,7 @@ struct 

Re: [PATCH 5/8] drm/amdkfd: Register/Deregister process on qpd resolution

2017-11-01 Thread Oded Gabbay
On Thu, Oct 26, 2017 at 1:41 AM, Felix Kuehling  wrote:
> From: Ben Goz 
>
> Process registration needs to happen on each device. So use per-device
> queue lists to determine when to register/deregister the process.
>
> Signed-off-by: Ben Goz 
> Signed-off-by: Felix Kuehling 
> ---
>  drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c | 9 ++---
>  1 file changed, 6 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c 
> b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
> index 5129dc1..2bec902 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
> @@ -177,7 +177,8 @@ int pqm_create_queue(struct process_queue_manager *pqm,
> if (retval != 0)
> return retval;
>
> -   if (list_empty(>queues)) {
> +   if (list_empty(>qpd.queues_list) &&
> +   list_empty(>qpd.priv_queue_list)) {
> pdd->qpd.pqm = pqm;
> dev->dqm->ops.register_process(dev->dqm, >qpd);
> }
> @@ -248,7 +249,8 @@ int pqm_create_queue(struct process_queue_manager *pqm,
>  err_allocate_pqn:
> /* check if queues list is empty unregister process from device */
> clear_bit(*qid, pqm->queue_slot_bitmap);
> -   if (list_empty(>queues))
> +   if (list_empty(>qpd.queues_list) &&
> +   list_empty(>qpd.priv_queue_list))
> dev->dqm->ops.unregister_process(dev->dqm, >qpd);
> return retval;
>  }
> @@ -302,7 +304,8 @@ int pqm_destroy_queue(struct process_queue_manager *pqm, 
> unsigned int qid)
> kfree(pqn);
> clear_bit(qid, pqm->queue_slot_bitmap);
>
> -   if (list_empty(>queues))
> +   if (list_empty(>qpd.queues_list) &&
> +   list_empty(>qpd.priv_queue_list))
> dqm->ops.unregister_process(dqm, >qpd);
>
> return retval;
> --
> 2.7.4
>
This patch is:
Reviewed-by: Oded Gabbay 
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 4/8] drm/amdkfd: Fix debug unregister procedure on process termination

2017-11-01 Thread Oded Gabbay
On Thu, Oct 26, 2017 at 1:41 AM, Felix Kuehling  wrote:
> From: Yair Shachar 
>
> Take the dbgmgr lock and unregister before destroying the debug manager.
> Do this before destroying the queues.
>
> Signed-off-by: Yair Shachar 
> Signed-off-by: Felix Kuehling 
> ---
>  drivers/gpu/drm/amd/amdkfd/kfd_process.c | 37 
> +++-
>  1 file changed, 27 insertions(+), 10 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c 
> b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
> index 5d93a5c..6caf6df 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
> @@ -229,17 +229,26 @@ static void kfd_process_notifier_release(struct 
> mmu_notifier *mn,
>
> mutex_lock(>mutex);
>
> +   /* Iterate over all process device data structures and if the
> +* pdd is in debug mode, we should first force unregistration,
> +* then we will be able to destroy the queues
> +*/
> +   list_for_each_entry(pdd, >per_device_data, per_device_list) {
> +   struct kfd_dev *dev = pdd->dev;
> +
> +   mutex_lock(kfd_get_dbgmgr_mutex());
> +   if (dev && dev->dbgmgr && dev->dbgmgr->pasid == p->pasid) {
> +   if (!kfd_dbgmgr_unregister(dev->dbgmgr, p)) {
> +   kfd_dbgmgr_destroy(dev->dbgmgr);
> +   dev->dbgmgr = NULL;
> +   }
> +   }
> +   mutex_unlock(kfd_get_dbgmgr_mutex());
> +   }
> +
> kfd_process_dequeue_from_all_devices(p);
> pqm_uninit(>pqm);
>
> -   /* Iterate over all process device data structure and check
> -* if we should delete debug managers
> -*/
> -   list_for_each_entry(pdd, >per_device_data, per_device_list)
> -   if ((pdd->dev->dbgmgr) &&
> -   (pdd->dev->dbgmgr->pasid == p->pasid))
> -   kfd_dbgmgr_destroy(pdd->dev->dbgmgr);
> -
> mutex_unlock(>mutex);
>
> /*
> @@ -468,8 +477,16 @@ void kfd_process_iommu_unbind_callback(struct kfd_dev 
> *dev, unsigned int pasid)
>
> pr_debug("Unbinding process %d from IOMMU\n", pasid);
>
> -   if ((dev->dbgmgr) && (dev->dbgmgr->pasid == p->pasid))
> -   kfd_dbgmgr_destroy(dev->dbgmgr);
> +   mutex_lock(kfd_get_dbgmgr_mutex());
> +
> +   if (dev->dbgmgr && dev->dbgmgr->pasid == p->pasid) {
> +   if (!kfd_dbgmgr_unregister(dev->dbgmgr, p)) {
> +   kfd_dbgmgr_destroy(dev->dbgmgr);
> +   dev->dbgmgr = NULL;
> +   }
> +   }
> +
> +   mutex_unlock(kfd_get_dbgmgr_mutex());
>
> pdd = kfd_get_process_device_data(dev, p);
> if (pdd)
> --
> 2.7.4
>
The patch is fine but to prevent theoretical deadlocks, I suggest to
add to this patch a change in the order of locks at
kfd_ioctl_dbg_register(), so that p->mutex is taken *before* dbgmgr
mutex

With that change, this patch is:
Reviewed-by: Oded Gabbay 
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


RE: [PATCH 2/2] drm/amdgpu:cleanup deprecated gpu reset logic

2017-11-01 Thread Liu, Monk
Sounds feasible, only this step looks need some investigation on hardware side:

>in gfx_v9_0_priv_reg_irq() (and all the other IRQ handlers as well) we add 
>functionality to figure out the ring/scheduler which caused the illegal 
>operation.

e.g. :

gfx_v9_priv_reg_irq(struct amdgpu_device *adev, strut amdgpu_irq_src *source, 
struct amdgpu_iv_entry *entry){
  //in this function, need to figure out which ring cause this error from 
source/entry parameter
}



-Original Message-
From: Koenig, Christian 
Sent: 2017年11月1日 16:42
To: Liu, Monk ; amd-gfx@lists.freedesktop.org
Subject: Re: [PATCH 2/2] drm/amdgpu:cleanup deprecated gpu reset logic

Am 01.11.2017 um 04:29 schrieb Liu, Monk:
> The thing is triggering gpu_recover() in irq routine give you NULL for the 
> @bad/job parameter, so gpu_recover() actually did nothing meaningful, it just 
> repeat scheduling un-signaled jobs again and again, and finally your GPU is 
> stuck with infinite recovering 

Yeah, completely agree that this isn't a good implementation. But we need to 
somehow do better than that.

> here is my initial thought:
> In " gfx_v9_0_priv_reg_irq(struct amdgpu_device *adev, struct amdgpu_iv_entry 
> *entry)" routine:
> For lockup_timeout==0 case: we put a new parameter "@sched" to 
> amd_gpu_recover(), that way although we don't have @bad/job, but with @sched 
> we can still find out the pending job on his scheduler, But that make code 
> ugly, cuz we need to change amdgpu_gpu_recover(adev, job) to 
> amdgpu_gpu_recover(adev, job, sched). Anyway need to find a way to tell 
> gpu_recover() at least which ring has problem.
>
> For lockup_timeout!=0 case: did nothing and just return since TDR will 
> correct this error and do recover() gracefully.

I think waiting for the TDR will take to long. How about this instead:

1. In gfx_v9_0_priv_reg_irq() (and all the other IRQ handlers as well) we add 
functionality to figure out the ring/scheduler which caused the illegal 
operation.

2. Then we add a new function amd_sched_job_fault() which gets the scheduler 
which had a fault as parameter.

3. amd_sched_job_fault() then grabs job_list_lock and takes the first job from 
the ring_mirror_list.

4. We call cancel_delayed_work() to cancel the timeout of the job.

5. If cancel_delayed_work() returns true (which means the TDR isn't already 
running anyway) we call schedule_delayed_work() with a zero timeout.

This way the actual timeout value is truncated to zero and we run the recovery 
immediately just in the same way as it would when we have waited.

Should be trivial to implement actually. What do you think?

Regards,
Christian.

>
>
>
> -Original Message-
> From: Christian König [mailto:ckoenig.leichtzumer...@gmail.com]
> Sent: 2017年10月31日 23:01
> To: Liu, Monk ; amd-gfx@lists.freedesktop.org
> Subject: Re: [PATCH 2/2] drm/amdgpu:cleanup deprecated gpu reset logic
>
> Am 31.10.2017 um 12:55 schrieb Monk Liu:
>> trigger gpu reset/recovery from illegle instruction IRQ is deprecated 
>> long time ago, we already switch to recover gpu by TDR triggering.
>>
>> now please set lockup_timeout to non-zero value in driver loading to 
>> enable TDR.
> The patch is ok, but NAK to the general approach. We should have illegal 
> instruction/access alongside the timeout.
>
> But instead of trying to trigger the reset directly inform the scheduler that 
> the we detected a problem on the engine. The scheduler can then cancel the 
> timeout and do the appropriate things.
>
> This patch would be ok if you install this new functionality directly after 
> removing the old (and broken) one.
>
> Regards,
> Christian.
>
>> Change-Id: I45a576a97fd9859e1098e785ce857c2cf5adfba5
>> Signed-off-by: Monk Liu 
>> ---
>>drivers/gpu/drm/amd/amdgpu/amdgpu.h |  1 -
>>drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c | 21 -
>>drivers/gpu/drm/amd/amdgpu/cik_sdma.c   |  1 -
>>drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c   |  2 --
>>drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c   |  3 ---
>>drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c   |  2 --
>>drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c   |  2 --
>>drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c  |  1 -
>>drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c  |  1 -
>>drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c  |  1 -
>>drivers/gpu/drm/amd/amdgpu/si_dma.c |  1 -
>>11 files changed, 36 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> index 6e89be5..b3453e3 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> @@ -1459,7 +1459,6 @@ struct amdgpu_device {
>>  boolshutdown;
>>  boolneed_dma32;
>>  boolaccel_working;
>> -struct work_struct  reset_work;
>>  struct notifier_block   acpi_nb;
>>  struct amdgpu_i2c_chan

Re: [PATCH 3/8] drm/amdkfd: Avoid calling amd_iommu_unbind_pasid() when suspending

2017-11-01 Thread Oded Gabbay
On Thu, Oct 26, 2017 at 1:41 AM, Felix Kuehling  wrote:
> From: Yong Zhao 
>
> When kfd suspending on APU, we do not need to call
> amd_iommu_unbind_pasid(), because pasid will be unbound automatically
> when power goes off.
>
> On the other hand, calling amd_iommu_unbind_pasid() will trigger
> kfd_process_iommu_unbind_callback() if the process is not terminating.
> By design, kfd_process_iommu_unbind_callback() should only be called
> for process terminating. So we would rather not to call
> amd_iommu_unbind_pasid() when suspending.
>
> Signed-off-by: Yong Zhao 
> Signed-off-by: Felix Kuehling 
> ---
>  drivers/gpu/drm/amd/amdkfd/kfd_process.c | 14 +-
>  1 file changed, 5 insertions(+), 9 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c 
> b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
> index f871855..5d93a5c 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
> @@ -428,29 +428,25 @@ int kfd_bind_processes_to_device(struct kfd_dev *dev)
>  }
>
>  /*
> - * Temporarily unbind currently bound processes from the device and
> - * mark them as PDD_BOUND_SUSPENDED. These processes will be restored
> - * to PDD_BOUND state in kfd_bind_processes_to_device.
> + * Mark currently bound processes as PDD_BOUND_SUSPENDED. These
> + * processes will be restored to PDD_BOUND state in
> + * kfd_bind_processes_to_device.
>   */
>  void kfd_unbind_processes_from_device(struct kfd_dev *dev)
>  {
> struct kfd_process_device *pdd;
> struct kfd_process *p;
> -   unsigned int temp, temp_bound, temp_pasid;
> +   unsigned int temp;
>
> int idx = srcu_read_lock(_processes_srcu);
>
> hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) {
> mutex_lock(>mutex);
> pdd = kfd_get_process_device_data(dev, p);
> -   temp_bound = pdd->bound;
> -   temp_pasid = p->pasid;
> +
> if (pdd->bound == PDD_BOUND)
> pdd->bound = PDD_BOUND_SUSPENDED;
> mutex_unlock(>mutex);
> -
> -   if (temp_bound == PDD_BOUND)
> -   amd_iommu_unbind_pasid(dev->pdev, temp_pasid);
> }
>
> srcu_read_unlock(_processes_srcu, idx);
> --
> 2.7.4
>
This patch is:
Reviewed-by: Oded Gabbay 
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 2/8] drm/amdkfd: Disable CP/SDMA ring/doorbell in MQD

2017-11-01 Thread Oded Gabbay
On Thu, Oct 26, 2017 at 1:41 AM, Felix Kuehling  wrote:
> From: Jay Cornwall 
>
> The MQD represents an inactive context and should not have ring or
> doorbell enable bits set. Doing so interferes with HWS which streams
> the MQD onto the HQD. If enable bits are set this activates the ring
> or doorbell before the HQD is fully configured.
>
> Signed-off-by: Jay Cornwall 
> Signed-off-by: Felix Kuehling 
> ---
>  drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c | 34 
> +++-
>  drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c  |  7 ++---
>  2 files changed, 11 insertions(+), 30 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c 
> b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c
> index 44ffd23..4859d26 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c
> @@ -189,12 +189,9 @@ static int update_mqd(struct mqd_manager *mm, void *mqd,
> if (q->format == KFD_QUEUE_FORMAT_AQL)
> m->cp_hqd_pq_control |= NO_UPDATE_RPTR;
>
> -   q->is_active = false;
> -   if (q->queue_size > 0 &&
> +   q->is_active = (q->queue_size > 0 &&
> q->queue_address != 0 &&
> -   q->queue_percent > 0) {
> -   q->is_active = true;
> -   }
> +   q->queue_percent > 0);
>
> return 0;
>  }
> @@ -215,24 +212,17 @@ static int update_mqd_sdma(struct mqd_manager *mm, void 
> *mqd,
> m->sdma_rlc_rb_base_hi = upper_32_bits(q->queue_address >> 8);
> m->sdma_rlc_rb_rptr_addr_lo = lower_32_bits((uint64_t)q->read_ptr);
> m->sdma_rlc_rb_rptr_addr_hi = upper_32_bits((uint64_t)q->read_ptr);
> -   m->sdma_rlc_doorbell = q->doorbell_off <<
> -   SDMA0_RLC0_DOORBELL__OFFSET__SHIFT |
> -   1 << SDMA0_RLC0_DOORBELL__ENABLE__SHIFT;
> +   m->sdma_rlc_doorbell =
> +   q->doorbell_off << SDMA0_RLC0_DOORBELL__OFFSET__SHIFT;
>
> m->sdma_rlc_virtual_addr = q->sdma_vm_addr;
>
> m->sdma_engine_id = q->sdma_engine_id;
> m->sdma_queue_id = q->sdma_queue_id;
>
> -   q->is_active = false;
> -   if (q->queue_size > 0 &&
> +   q->is_active = (q->queue_size > 0 &&
> q->queue_address != 0 &&
> -   q->queue_percent > 0) {
> -   m->sdma_rlc_rb_cntl |=
> -   1 << SDMA0_RLC0_RB_CNTL__RB_ENABLE__SHIFT;
> -
> -   q->is_active = true;
> -   }
> +   q->queue_percent > 0);
>
> return 0;
>  }
> @@ -359,19 +349,13 @@ static int update_mqd_hiq(struct mqd_manager *mm, void 
> *mqd,
> m->cp_hqd_pq_base_hi = upper_32_bits((uint64_t)q->queue_address >> 8);
> m->cp_hqd_pq_rptr_report_addr_lo = 
> lower_32_bits((uint64_t)q->read_ptr);
> m->cp_hqd_pq_rptr_report_addr_hi = 
> upper_32_bits((uint64_t)q->read_ptr);
> -   m->cp_hqd_pq_doorbell_control = DOORBELL_EN |
> -   DOORBELL_OFFSET(q->doorbell_off);
> +   m->cp_hqd_pq_doorbell_control = DOORBELL_OFFSET(q->doorbell_off);
>
> m->cp_hqd_vmid = q->vmid;
>
> -   m->cp_hqd_active = 0;
> -   q->is_active = false;
> -   if (q->queue_size > 0 &&
> +   q->is_active = (q->queue_size > 0 &&
> q->queue_address != 0 &&
> -   q->queue_percent > 0) {
> -   m->cp_hqd_active = 1;
> -   q->is_active = true;
> -   }
> +   q->queue_percent > 0);
>
> return 0;
>  }
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c 
> b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c
> index 73cbfe1..4ea854f 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c
> @@ -163,12 +163,9 @@ static int __update_mqd(struct mqd_manager *mm, void 
> *mqd,
> 2 << 
> CP_HQD_PQ_CONTROL__SLOT_BASED_WPTR__SHIFT;
> }
>
> -   q->is_active = false;
> -   if (q->queue_size > 0 &&
> +   q->is_active = (q->queue_size > 0 &&
> q->queue_address != 0 &&
> -   q->queue_percent > 0) {
> -   q->is_active = true;
> -   }
> +   q->queue_percent > 0);
>
> return 0;
>  }
> --
> 2.7.4
>
This patch is:
Reviewed-by: Oded Gabbay 
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 1/8] drm/amdkfd: Clean up the data structure in kfd_process

2017-11-01 Thread Oded Gabbay
On Thu, Oct 26, 2017 at 1:41 AM, Felix Kuehling  wrote:
> From: Yong Zhao 
>
> A list of per-process queues is maintained in the
> kfd_process_queue_manager, so the queues array in kfd_process is
> redundant and in fact unused.
>
> Signed-off-by: Yong Zhao 
> Signed-off-by: Felix Kuehling 
> ---
>  drivers/gpu/drm/amd/amdkfd/kfd_priv.h|  6 --
>  drivers/gpu/drm/amd/amdkfd/kfd_process.c | 18 --
>  2 files changed, 24 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h 
> b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> index bf29021..3b0cc20 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> @@ -527,12 +527,6 @@ struct kfd_process {
>
> struct process_queue_manager pqm;
>
> -   /* The process's queues. */
> -   size_t queue_array_size;
> -
> -   /* Size is queue_array_size, up to MAX_PROCESS_QUEUES. */
> -   struct kfd_queue **queues;
> -
> /*Is the user space process 32 bit?*/
> bool is_32bit_user_mode;
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c 
> b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
> index 21d27e5..f871855 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
> @@ -35,13 +35,6 @@ struct mm_struct;
>  #include "kfd_dbgmgr.h"
>
>  /*
> - * Initial size for the array of queues.
> - * The allocated size is doubled each time
> - * it is exceeded up to MAX_PROCESS_QUEUES.
> - */
> -#define INITIAL_QUEUE_ARRAY_SIZE 16
> -
> -/*
>   * List of struct kfd_process (field kfd_process).
>   * Unique/indexed by mm_struct*
>   */
> @@ -187,8 +180,6 @@ static void kfd_process_wq_release(struct work_struct 
> *work)
>
> mutex_destroy(>mutex);
>
> -   kfree(p->queues);
> -
> kfree(p);
>
> kfree(work);
> @@ -275,11 +266,6 @@ static struct kfd_process *create_process(const struct 
> task_struct *thread)
> if (!process)
> goto err_alloc_process;
>
> -   process->queues = kmalloc_array(INITIAL_QUEUE_ARRAY_SIZE,
> -   sizeof(process->queues[0]), 
> GFP_KERNEL);
> -   if (!process->queues)
> -   goto err_alloc_queues;
> -
> process->pasid = kfd_pasid_alloc();
> if (process->pasid == 0)
> goto err_alloc_pasid;
> @@ -302,8 +288,6 @@ static struct kfd_process *create_process(const struct 
> task_struct *thread)
>
> process->lead_thread = thread->group_leader;
>
> -   process->queue_array_size = INITIAL_QUEUE_ARRAY_SIZE;
> -
> INIT_LIST_HEAD(>per_device_data);
>
> kfd_event_init_process(process);
> @@ -332,8 +316,6 @@ static struct kfd_process *create_process(const struct 
> task_struct *thread)
>  err_alloc_doorbells:
> kfd_pasid_free(process->pasid);
>  err_alloc_pasid:
> -   kfree(process->queues);
> -err_alloc_queues:
> kfree(process);
>  err_alloc_process:
> return ERR_PTR(err);
> --
> 2.7.4
>
This patch is:
Reviewed-by: Oded Gabbay 
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 3/3] drm/amdgpu: pick SR-IOV fw reservation information in atomfirmware

2017-11-01 Thread Horace Chen
SR-IOV need to get start offset and size from firmware for its vram
reservation. This logic had been add to the atombios code path.

As the current branch will run atomfirmware by default, add same
logic to atomfirmware code path.

Signed-off-by: Horace Chen 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c | 14 ++
 1 file changed, 14 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c
index f9ffe8e..8509907 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c
@@ -73,6 +73,8 @@ int amdgpu_atomfirmware_allocate_fb_scratch(struct 
amdgpu_device *adev)
vram_usagebyfirmware);
uint16_t data_offset;
int usage_bytes = 0;
+   u64 start_addr;
+   u64 size;
 
if (amdgpu_atom_parse_data_header(ctx, index, NULL, NULL, NULL, 
_offset)) {
struct vram_usagebyfirmware_v2_1 *firmware_usage =
@@ -83,6 +85,18 @@ int amdgpu_atomfirmware_allocate_fb_scratch(struct 
amdgpu_device *adev)
  le16_to_cpu(firmware_usage->used_by_firmware_in_kb),
  le16_to_cpu(firmware_usage->used_by_driver_in_kb));
 
+   start_addr = firmware_usage->start_address_in_kb;
+   size = firmware_usage->used_by_firmware_in_kb;
+
+   if ((uint32_t)(start_addr & ATOM_VRAM_OPERATION_FLAGS_MASK) ==
+   (uint32_t)(ATOM_VRAM_BLOCK_SRIOV_MSG_SHARE_RESERVATION 
<<
+   ATOM_VRAM_OPERATION_FLAGS_SHIFT)) {
+   /* Firmware request VRAM reservation for SR-IOV */
+   adev->fw_vram_usage.start_offset = (start_addr &
+   (~ATOM_VRAM_OPERATION_FLAGS_MASK)) << 10;
+   adev->fw_vram_usage.size = size << 10;
+   /* Use the default scratch size */
+   }
usage_bytes = le16_to_cpu(firmware_usage->used_by_driver_in_kb) 
* 1024;
}
ctx->scratch_size_bytes = 0;
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH] drm/amd/powerplay: wrong control mode cause the fan spins faster unnecessarily

2017-11-01 Thread Evan Quan
The fan control mode can either be FDO_PWM_MODE_STATIC or 
FDO_PWM_MODE_STATIC_RPM.
Setting it as AMD_FAN_CTRL_AUTO will cause the fan spin faster wrongly.

This can be reproduced by:
'# cat /sys/class/hwmon/hwmon0/pwm1
   38
'# cat /sys/class/hwmon/hwmon0/pwm1_enable
   2
'# echo "2" > /sys/class/hwmon/hwmon0/pwm1_enable
'# cat /sys/class/hwmon/hwmon0/pwm1
   122
The fan speed get faster wronly even with its original mode echo back.

Change-Id: I51b0586ac17e0d663925c87d548847b31c79d68a
Signed-off-by: Evan Quan 
---
 drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c 
b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
index 203ef10..4239b98 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
@@ -4234,7 +4234,7 @@ static void vega10_set_fan_control_mode(struct pp_hwmgr 
*hwmgr, uint32_t mode)
vega10_fan_ctrl_stop_smc_fan_control(hwmgr);
break;
case AMD_FAN_CTRL_AUTO:
-   if (!vega10_fan_ctrl_set_static_mode(hwmgr, mode))
+   if (PP_CAP(PHM_PlatformCaps_MicrocodeFanControl))
vega10_fan_ctrl_start_smc_fan_control(hwmgr);
break;
default:
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 2/3] drm/amdgpu: change interface when firmware reserving vram

2017-11-01 Thread Horace Chen
use amdgpu_bo_create_vram_restricted_kernel to reserve bo at
specified position and keep the original data intact.

Signed-off-by: Horace Chen 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 44 ++
 1 file changed, 8 insertions(+), 36 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 2ff2c54..c0c6eab 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -687,45 +687,17 @@ int amdgpu_fw_reserve_vram_init(struct amdgpu_device 
*adev)
adev->fw_vram_usage.reserved_bo = NULL;
 
if (adev->fw_vram_usage.size > 0 &&
-   adev->fw_vram_usage.size <= vram_size) {
+   adev->fw_vram_usage.size <= vram_size) {
+   r = amdgpu_bo_create_vram_restricted_kernel(adev,
+adev->fw_vram_usage.start_offset,
+adev->fw_vram_usage.size, PAGE_SIZE,
+AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
+AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS,
+>fw_vram_usage.reserved_bo,
+_addr, >fw_vram_usage.va);
 
-   r = amdgpu_bo_create(adev, adev->fw_vram_usage.size,
-   PAGE_SIZE, true, 0,
-   AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
-   AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS, NULL, NULL, 0,
-   >fw_vram_usage.reserved_bo);
-   if (r)
-   goto error_create;
-
-   r = amdgpu_bo_reserve(adev->fw_vram_usage.reserved_bo, false);
-   if (r)
-   goto error_reserve;
-   r = amdgpu_bo_pin_restricted(adev->fw_vram_usage.reserved_bo,
-   AMDGPU_GEM_DOMAIN_VRAM,
-   adev->fw_vram_usage.start_offset,
-   (adev->fw_vram_usage.start_offset +
-   adev->fw_vram_usage.size), _addr);
-   if (r)
-   goto error_pin;
-   r = amdgpu_bo_kmap(adev->fw_vram_usage.reserved_bo,
-   >fw_vram_usage.va);
-   if (r)
-   goto error_kmap;
-
-   amdgpu_bo_unreserve(adev->fw_vram_usage.reserved_bo);
}
return r;
-
-error_kmap:
-   amdgpu_bo_unpin(adev->fw_vram_usage.reserved_bo);
-error_pin:
-   amdgpu_bo_unreserve(adev->fw_vram_usage.reserved_bo);
-error_reserve:
-   amdgpu_bo_unref(>fw_vram_usage.reserved_bo);
-error_create:
-   adev->fw_vram_usage.va = NULL;
-   adev->fw_vram_usage.reserved_bo = NULL;
-   return r;
 }
 
 
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 1/3] drm/amdgpu: Add interface to protect VRAM at exact position

2017-11-01 Thread Christian König

Am 01.11.2017 um 09:58 schrieb Horace Chen:

The existing method to reserve specified VRAM is to create a bo on
system domain then pin it to VRAM. But in this process the existing
data on the VRAM will be broken, because ttm will allocate a zero
buffer on system domain then copy the zeroes to VRAM.

Actually SRIOV need to reserve VRAM to protect data at exact
position.

So here add a new interface to create bo at the exact position
directly and then pin it immediately to avoid eviction and avoid
breaking the existing data on the VRAM.

Signed-off-by: Horace Chen 


Modifying amdgpu_bo_do_create is still a NAK from myside, that function 
is why to complicated as it already is.


If you need to assign a specific placement without clearing it you can 
call ttm_bo_mem_space() manually to do so.


E.g. something like this:

ttm_bo_mem_put(>tbo, >tbo.mem);
r = ttm_bo_mem_space(>tbo, , >tbo.mem, false, false);

That assigns the new memory without any copy operation.

Regards,
Christian.


---
  drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 84 --
  drivers/gpu/drm/amd/amdgpu/amdgpu_object.h |  4 ++
  2 files changed, 85 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index e44b880..7108dc5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -281,7 +281,7 @@ void amdgpu_bo_free_kernel(struct amdgpu_bo **bo, u64 
*gpu_addr,
*cpu_addr = NULL;
  }
  
-static int amdgpu_bo_do_create(struct amdgpu_device *adev,

+static int amdgpu_bo_do_create(struct amdgpu_device *adev, u64 offset,
   unsigned long size, int byte_align,
   bool kernel, u32 domain, u64 flags,
   struct sg_table *sg,
@@ -295,6 +295,7 @@ static int amdgpu_bo_do_create(struct amdgpu_device *adev,
u64 initial_bytes_moved, bytes_moved;
size_t acc_size;
int r;
+   int i;
  
  	page_align = roundup(byte_align, PAGE_SIZE) >> PAGE_SHIFT;

size = ALIGN(size, PAGE_SIZE);
@@ -364,6 +365,21 @@ static int amdgpu_bo_do_create(struct amdgpu_device *adev,
bo->tbo.bdev = >mman.bdev;
amdgpu_ttm_placement_from_domain(bo, domain);
  
+	/*

+* if offset is not ~0, it means the offset of this bo is restricted.
+* modify the placement to create bo at the exact place.
+*/
+   if (((domain & AMDGPU_GEM_DOMAIN_VRAM) ||
+   (domain & AMDGPU_GEM_DOMAIN_GTT)) && offset != ~0) {
+   offset = ALIGN(offset, PAGE_SIZE);
+   for (i = 0; i < bo->placement.num_placement; ++i) {
+   bo->placements[i].fpfn =
+   offset >> PAGE_SHIFT;
+   bo->placements[i].lpfn =
+   (offset + size) >> PAGE_SHIFT;
+   }
+   }
+
initial_bytes_moved = atomic64_read(>num_bytes_moved);
/* Kernel allocation are uninterruptible */
r = ttm_bo_init_reserved(>mman.bdev, >tbo, size, type,
@@ -416,6 +432,68 @@ static int amdgpu_bo_do_create(struct amdgpu_device *adev,
return r;
  }
  
+/**

+ * amdgpu_bo_create_vram_restricted_kernel -
+ *create BO at the specified place on the VRAM.
+ *
+ * @adev: amdgpu device object
+ * @offset: start offset of the new BO
+ * @size: size for the new BO
+ * @byte_align: alignment for the new BO
+ * @flags: addition flags for the BO
+ * @bo_ptr: resulting BO
+ * @gpu_addr: GPU addr of the pinned BO
+ * @cpu_addr: optional CPU address mapping
+ *
+ * Allocates and pins a BO at the specified place on the VRAM.
+ *
+ * Returns 0 on success, negative error code otherwise.
+ */
+int amdgpu_bo_create_vram_restricted_kernel(struct amdgpu_device *adev,
+u64 offset, unsigned long size, int byte_align,
+u64 flags, struct amdgpu_bo **bo_ptr,
+u64 *gpu_addr, void **cpu_addr)
+{
+   int r;
+   /* specified memory must be in contiguous*/
+   flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
+
+   r = amdgpu_bo_do_create(adev, offset, size, byte_align, true,
+   AMDGPU_GEM_DOMAIN_VRAM, flags, NULL, NULL, 0, bo_ptr);
+   if (r)
+   return r;
+
+   r = amdgpu_bo_reserve(*bo_ptr, false);
+   if (r)
+   goto error_reserve;
+   r = amdgpu_bo_pin_restricted(*bo_ptr,
+   AMDGPU_GEM_DOMAIN_VRAM, offset, (offset + size),
+   gpu_addr);
+   if (r)
+   goto error_pin;
+   if (cpu_addr) {
+   r = amdgpu_bo_kmap(*bo_ptr,
+   cpu_addr);
+   if (r)
+   goto error_kmap;
+   }
+
+   amdgpu_bo_unreserve(*bo_ptr);
+
+   return r;
+error_kmap:
+   amdgpu_bo_unpin(*bo_ptr);
+error_pin:
+   amdgpu_bo_unreserve(*bo_ptr);

[PATCH 1/3] drm/amdgpu: Add interface to protect VRAM at exact position

2017-11-01 Thread Horace Chen
The existing method to reserve specified VRAM is to create a bo on
system domain then pin it to VRAM. But in this process the existing
data on the VRAM will be broken, because ttm will allocate a zero
buffer on system domain then copy the zeroes to VRAM.

Actually SRIOV need to reserve VRAM to protect data at exact
position.

So here add a new interface to create bo at the exact position
directly and then pin it immediately to avoid eviction and avoid
breaking the existing data on the VRAM.

Signed-off-by: Horace Chen 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 84 --
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.h |  4 ++
 2 files changed, 85 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index e44b880..7108dc5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -281,7 +281,7 @@ void amdgpu_bo_free_kernel(struct amdgpu_bo **bo, u64 
*gpu_addr,
*cpu_addr = NULL;
 }
 
-static int amdgpu_bo_do_create(struct amdgpu_device *adev,
+static int amdgpu_bo_do_create(struct amdgpu_device *adev, u64 offset,
   unsigned long size, int byte_align,
   bool kernel, u32 domain, u64 flags,
   struct sg_table *sg,
@@ -295,6 +295,7 @@ static int amdgpu_bo_do_create(struct amdgpu_device *adev,
u64 initial_bytes_moved, bytes_moved;
size_t acc_size;
int r;
+   int i;
 
page_align = roundup(byte_align, PAGE_SIZE) >> PAGE_SHIFT;
size = ALIGN(size, PAGE_SIZE);
@@ -364,6 +365,21 @@ static int amdgpu_bo_do_create(struct amdgpu_device *adev,
bo->tbo.bdev = >mman.bdev;
amdgpu_ttm_placement_from_domain(bo, domain);
 
+   /*
+* if offset is not ~0, it means the offset of this bo is restricted.
+* modify the placement to create bo at the exact place.
+*/
+   if (((domain & AMDGPU_GEM_DOMAIN_VRAM) ||
+   (domain & AMDGPU_GEM_DOMAIN_GTT)) && offset != ~0) {
+   offset = ALIGN(offset, PAGE_SIZE);
+   for (i = 0; i < bo->placement.num_placement; ++i) {
+   bo->placements[i].fpfn =
+   offset >> PAGE_SHIFT;
+   bo->placements[i].lpfn =
+   (offset + size) >> PAGE_SHIFT;
+   }
+   }
+
initial_bytes_moved = atomic64_read(>num_bytes_moved);
/* Kernel allocation are uninterruptible */
r = ttm_bo_init_reserved(>mman.bdev, >tbo, size, type,
@@ -416,6 +432,68 @@ static int amdgpu_bo_do_create(struct amdgpu_device *adev,
return r;
 }
 
+/**
+ * amdgpu_bo_create_vram_restricted_kernel -
+ *create BO at the specified place on the VRAM.
+ *
+ * @adev: amdgpu device object
+ * @offset: start offset of the new BO
+ * @size: size for the new BO
+ * @byte_align: alignment for the new BO
+ * @flags: addition flags for the BO
+ * @bo_ptr: resulting BO
+ * @gpu_addr: GPU addr of the pinned BO
+ * @cpu_addr: optional CPU address mapping
+ *
+ * Allocates and pins a BO at the specified place on the VRAM.
+ *
+ * Returns 0 on success, negative error code otherwise.
+ */
+int amdgpu_bo_create_vram_restricted_kernel(struct amdgpu_device *adev,
+u64 offset, unsigned long size, int byte_align,
+u64 flags, struct amdgpu_bo **bo_ptr,
+u64 *gpu_addr, void **cpu_addr)
+{
+   int r;
+   /* specified memory must be in contiguous*/
+   flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
+
+   r = amdgpu_bo_do_create(adev, offset, size, byte_align, true,
+   AMDGPU_GEM_DOMAIN_VRAM, flags, NULL, NULL, 0, bo_ptr);
+   if (r)
+   return r;
+
+   r = amdgpu_bo_reserve(*bo_ptr, false);
+   if (r)
+   goto error_reserve;
+   r = amdgpu_bo_pin_restricted(*bo_ptr,
+   AMDGPU_GEM_DOMAIN_VRAM, offset, (offset + size),
+   gpu_addr);
+   if (r)
+   goto error_pin;
+   if (cpu_addr) {
+   r = amdgpu_bo_kmap(*bo_ptr,
+   cpu_addr);
+   if (r)
+   goto error_kmap;
+   }
+
+   amdgpu_bo_unreserve(*bo_ptr);
+
+   return r;
+error_kmap:
+   amdgpu_bo_unpin(*bo_ptr);
+error_pin:
+   amdgpu_bo_unreserve(*bo_ptr);
+error_reserve:
+   amdgpu_bo_unref(bo_ptr);
+   if (cpu_addr)
+   *cpu_addr = NULL;
+   if (gpu_addr)
+   *gpu_addr = 0;
+   return r;
+}
+
 static int amdgpu_bo_create_shadow(struct amdgpu_device *adev,
   unsigned long size, int byte_align,
   struct amdgpu_bo *bo)
@@ -425,7 +503,7 @@ static int amdgpu_bo_create_shadow(struct amdgpu_device 
*adev,
if (bo->shadow)
   

Re: [Mesa-dev] [PATCH] amdgpu: Add R600_DEBUG flag to reserve VMID per ctx.

2017-11-01 Thread Christian König

I'm not 100% sure that patch was correct.

When is amdgpu_ctx_create() called? The VMID is reserved for the whole 
process, not just a context.


Regards,
Christian.

Am 31.10.2017 um 16:57 schrieb Marek Olšák:

I addressed the feedback and pushed the patch.

Marek

On Tue, Oct 31, 2017 at 4:50 PM, Michel Dänzer  wrote:

On 31/10/17 04:40 PM, Andrey Grodzovsky wrote:

Signed-off-by: Andrey Grodzovsky 

[...]


diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c 
b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
index 8f43e93..1155492 100644
--- a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
@@ -256,6 +256,14 @@ static struct radeon_winsys_ctx *amdgpu_ctx_create(struct 
radeon_winsys *ws)
goto error_create;
 }

+   if (ctx->ws->reserve_vmid) {
+r = amdgpu_vm_reserve_vmid(ctx->ctx, 0);
+if (r) {
+ fprintf(stderr, "amdgpu: amdgpu_cs_ctx_create failed. (%i)\n", r);

This should say "amdgpu: amdgpu_vm_reserve_vmid failed. (%i)\n".


--
Earthling Michel Dänzer   |   http://www.amd.com
Libre software enthusiast | Mesa and X developer
___
mesa-dev mailing list
mesa-...@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev



___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: KASAN use-after-free report during piglit run

2017-11-01 Thread Christian König

Am 31.10.2017 um 18:58 schrieb Michel Dänzer:

On 25/10/17 05:43 PM, Michel Dänzer wrote:

KASAN caught another use-after-free on my development machine today, see
the attached dmesg excerpt. There haven't been any related changes in
amd-staging-drm-next since yesterday, so maybe userspace is just
tickling the kernel differently (e.g. piglit runs some more tests in
parallel now). It's not reproducible every time, but it just happened a
second time (with an amd-staging-drm-next commit from about a week ago).

I took a closer look, and I think I see what's happening. The
use-after-free happens at:

   reservation_object_wait_timeout_rcu+0xe02/0xe90
   ttm_bo_cleanup_refs_and_unlock+0x271/0x990 [ttm] (ttm_bo.c:530)
   ttm_mem_evict_first+0x263/0x4a0 [ttm]

The memory was freed at:

   [reservation_object_fini]
   ttm_bo_cleanup_refs_and_unlock+0x517/0x990 [ttm] (ttm_bo.c:564)
   ttm_mem_evict_first+0x263/0x4a0 [ttm]

So it's two processes handling the same BO in ttm_mem_evict_first ->
ttm_bo_cleanup_refs_and_unlock. The first one unreserved the BO before
calling reservation_object_wait_timeout_rcu. Meanwhile, the other one
manages to reserve the BO and get all the way to the end of
ttm_bo_cleanup_refs_and_unlock, destroying bo->ttm_resv. Then
reservation_object_wait_timeout_rcu in the first process still accesses
memory which bo->ttm_resv pointed to => boom.


Good catch. But this means that just grabbing another reference before 
calling reservation_object_wait_timeout_rcu() and we should be on the 
safe side, shouldn't we ?


Going to take a closer look tomorrow, today is a holiday here and I'm 
actually ill again once more :(


Christian.

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] amdgpu: Add R600_DEBUG flag to reserve VMID per ctx.

2017-11-01 Thread Christian König

Am 31.10.2017 um 16:50 schrieb Michel Dänzer:

On 31/10/17 04:40 PM, Andrey Grodzovsky wrote:

Signed-off-by: Andrey Grodzovsky 

[...]


diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c 
b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
index 8f43e93..1155492 100644
--- a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
@@ -256,6 +256,14 @@ static struct radeon_winsys_ctx *amdgpu_ctx_create(struct 
radeon_winsys *ws)
goto error_create;
 }
  
+   if (ctx->ws->reserve_vmid) {

+  r = amdgpu_vm_reserve_vmid(ctx->ctx, 0);
+  if (r) {
+   fprintf(stderr, "amdgpu: amdgpu_cs_ctx_create failed. (%i)\n", 
r);

This should say "amdgpu: amdgpu_vm_reserve_vmid failed. (%i)\n".



Not sure when amdgpu_ctx_create() is called, but the VMID is reserved 
per process not per context.


Christian.

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 2/2] drm/amdgpu:cleanup deprecated gpu reset logic

2017-11-01 Thread Christian König

Am 01.11.2017 um 04:29 schrieb Liu, Monk:

The thing is triggering gpu_recover() in irq routine give you NULL for the 
@bad/job parameter, so gpu_recover() actually did nothing meaningful, it just 
repeat scheduling un-signaled jobs again and again, and finally your GPU is 
stuck with infinite recovering 


Yeah, completely agree that this isn't a good implementation. But we 
need to somehow do better than that.



here is my initial thought:
In " gfx_v9_0_priv_reg_irq(struct amdgpu_device *adev, struct amdgpu_iv_entry 
*entry)" routine:
For lockup_timeout==0 case: we put a new parameter "@sched" to 
amd_gpu_recover(), that way although we don't have @bad/job, but with @sched we can still 
find out the pending job on his scheduler,
But that make code ugly, cuz we need to change amdgpu_gpu_recover(adev, job) to 
amdgpu_gpu_recover(adev, job, sched). Anyway need to find a way to tell 
gpu_recover() at least which ring has problem.

For lockup_timeout!=0 case: did nothing and just return since TDR will correct 
this error and do recover() gracefully.


I think waiting for the TDR will take to long. How about this instead:

1. In gfx_v9_0_priv_reg_irq() (and all the other IRQ handlers as well) 
we add functionality to figure out the ring/scheduler which caused the 
illegal operation.


2. Then we add a new function amd_sched_job_fault() which gets the 
scheduler which had a fault as parameter.


3. amd_sched_job_fault() then grabs job_list_lock and takes the first 
job from the ring_mirror_list.


4. We call cancel_delayed_work() to cancel the timeout of the job.

5. If cancel_delayed_work() returns true (which means the TDR isn't 
already running anyway) we call schedule_delayed_work() with a zero timeout.


This way the actual timeout value is truncated to zero and we run the 
recovery immediately just in the same way as it would when we have waited.


Should be trivial to implement actually. What do you think?

Regards,
Christian.





-Original Message-
From: Christian König [mailto:ckoenig.leichtzumer...@gmail.com]
Sent: 2017年10月31日 23:01
To: Liu, Monk ; amd-gfx@lists.freedesktop.org
Subject: Re: [PATCH 2/2] drm/amdgpu:cleanup deprecated gpu reset logic

Am 31.10.2017 um 12:55 schrieb Monk Liu:

trigger gpu reset/recovery from illegle instruction IRQ is deprecated
long time ago, we already switch to recover gpu by TDR triggering.

now please set lockup_timeout to non-zero value in driver loading to
enable TDR.

The patch is ok, but NAK to the general approach. We should have illegal 
instruction/access alongside the timeout.

But instead of trying to trigger the reset directly inform the scheduler that 
the we detected a problem on the engine. The scheduler can then cancel the 
timeout and do the appropriate things.

This patch would be ok if you install this new functionality directly after 
removing the old (and broken) one.

Regards,
Christian.


Change-Id: I45a576a97fd9859e1098e785ce857c2cf5adfba5
Signed-off-by: Monk Liu 
---
   drivers/gpu/drm/amd/amdgpu/amdgpu.h |  1 -
   drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c | 21 -
   drivers/gpu/drm/amd/amdgpu/cik_sdma.c   |  1 -
   drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c   |  2 --
   drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c   |  3 ---
   drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c   |  2 --
   drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c   |  2 --
   drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c  |  1 -
   drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c  |  1 -
   drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c  |  1 -
   drivers/gpu/drm/amd/amdgpu/si_dma.c |  1 -
   11 files changed, 36 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 6e89be5..b3453e3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1459,7 +1459,6 @@ struct amdgpu_device {
boolshutdown;
boolneed_dma32;
boolaccel_working;
-   struct work_struct  reset_work;
struct notifier_block   acpi_nb;
struct amdgpu_i2c_chan  *i2c_bus[AMDGPU_MAX_I2C_BUS];
struct amdgpu_debugfs   debugfs[AMDGPU_DEBUGFS_MAX_COMPONENTS];
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
index c340774..989b530 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
@@ -73,23 +73,6 @@ static void amdgpu_hotplug_work_func(struct work_struct 
*work)
drm_helper_hpd_irq_event(dev);
   }
   
-/**

- * amdgpu_irq_reset_work_func - execute gpu reset
- *
- * @work: work struct
- *
- * Execute scheduled gpu reset (cayman+).
- * This function is called when the irq handler
- * thinks we need a gpu reset.
- */
-static void amdgpu_irq_reset_work_func(struct work_struct *work) -{
-   struct amdgpu_device *adev = container_of(work, struct 

Re: [PATCH] drm/amdgpu: release exclusive mode after hw_init if no kfd

2017-11-01 Thread Oded Gabbay
Tom,
AFAIK, you can't manually load the amdkfd driver. i.e. it simply won't
work, with or without this change.
It must be loaded in a certain order (in relation to amdgpu and
amd_iommu_v2) during Linux boot.
That issue is one that has been with us from the start and has
received justified criticism over the years.
Unfortunately, no one bothered to fix it yet.

Oded

On Wed, Nov 1, 2017 at 3:25 AM, Ding, Pixel  wrote:
>
> I’m not for sure about this case you talked about. Assume that it could 
> happen and the KFD probe and init are invoked when loading it manually.
>
> For baremetal device, it’s always correct.
> For SRIOV virtual function, it doesn’t behave correctly with or without this 
> patch. KFD initialization also needs to access VF in exclusive mode, while 
> the exclusive mode request/release messages are sent in amdgpu_device_init.
>
> —
> Sincerely Yours,
> Pixel
>
>
>
>
>
>
>
> On 31/10/2017, 11:06 PM, "Tom Stellard"  wrote:
>
> >On 10/30/2017 12:57 AM, Pixel Ding wrote:
> >> From: pding 
> >>
> >> Move kfd probe prior to device init. Release exclusive mode
> >> after hw_init if kfd is not enabled.
> >>
> >
> >What happens if only the amdgpu module is loaded at startup, and then the
> >user manually loads the amdkfd module at some point later on.  Will the 
> >driver
> >still behave correctly in this case with this patch?
> >
> >-Tom
> >
> >
> >> Signed-off-by: pding 
> >> ---
> >>  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 3 +++
> >>  drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c| 5 +++--
> >>  2 files changed, 6 insertions(+), 2 deletions(-)
> >>
> >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
> >> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> >> index 400dfaa..e46ec51 100644
> >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> >> @@ -1716,6 +1716,9 @@ static int amdgpu_init(struct amdgpu_device *adev)
> >>  adev->ip_blocks[i].status.hw = true;
> >>  }
> >>
> >> +if (amdgpu_sriov_vf(adev) && !adev->kfd)
> >> +amdgpu_virt_release_full_gpu(adev, true);
> >> +
> >>  return 0;
> >>  }
> >>
> >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c 
> >> b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> >> index 3e9760d..e91907c 100644
> >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> >> @@ -138,6 +138,8 @@ int amdgpu_driver_load_kms(struct drm_device *dev, 
> >> unsigned long flags)
> >>  !pci_is_thunderbolt_attached(dev->pdev))
> >>  flags |= AMD_IS_PX;
> >>
> >> +amdgpu_amdkfd_device_probe(adev);
> >> +
> >>  /* amdgpu_device_init should report only fatal error
> >>   * like memory allocation failure or iomapping failure,
> >>   * or memory manager initialization failure, it must
> >> @@ -170,7 +172,6 @@ int amdgpu_driver_load_kms(struct drm_device *dev, 
> >> unsigned long flags)
> >>  "Error during ACPI methods call\n");
> >>  }
> >>
> >> -amdgpu_amdkfd_device_probe(adev);
> >>  amdgpu_amdkfd_device_init(adev);
> >>
> >>  if (amdgpu_device_is_px(dev)) {
> >> @@ -182,7 +183,7 @@ int amdgpu_driver_load_kms(struct drm_device *dev, 
> >> unsigned long flags)
> >>  pm_runtime_put_autosuspend(dev->dev);
> >>  }
> >>
> >> -if (amdgpu_sriov_vf(adev))
> >> +if (amdgpu_sriov_vf(adev) && adev->kfd)
> >>  amdgpu_virt_release_full_gpu(adev, true);
> >>
> >>  out:
> >>
> >
> ___
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 00/17] KFD interrupt and signal event handling improvements v2

2017-11-01 Thread Oded Gabbay
Hi,
I've taken the patch-set to -next.
Oded

On Sat, Oct 28, 2017 at 2:35 AM, Felix Kuehling 
wrote:

> This patch series improves interrupt handling latency, signal event
> processing overhead and replaces some custom data structures with
> standard kernel data structures (idr, kfifo, waitqueue).
>
> It also increases the capacity of the number of signals that can be
> processed from 256 to 4096. This breaks ancient versions of the Thunk
> that support only 256 signal events. The current WIP-version on github
> supports both sizes. If support for ancient Thunks is considered
> important, this could be fixed by allowing mappings that are smaller
> than 4096 signals, and limiting the number of signals per process
> depending on the size of the mapped events page.
>
> v2:
>  * Don't break ABI when changing KFD_SIGNAL_EVENT_LIMIT
>  * Integrated review feedback for "Use wait_queue_t to implement event
> waiting"
>  * Integrated review feedback for "Don't dereference kfd_process.mm"
>
> Andres Rodriguez (4):
>   drm/amdkfd: use standard kernel kfifo for IH
>   drm/amdkfd: increase IH num entries to 8192
>   drm/amdkfd: wait only for IH work on IH exit
>   drm/amdkfd: use a high priority workqueue for IH work
>
> Besar Wicaksono (1):
>   drm/amdkfd: Add SDMA trap src id to the KFD isr wanted list
>
> Felix Kuehling (9):
>   drm/amdkfd: Don't dereference kfd_process.mm v2
>   drm/amdkfd: Clean up kfd_wait_on_events
>   drm/amdkfd: Fix event destruction with pending waiters
>   drm/amdkfd: remove redundant kfd_event_waiter.input_index
>   drm/amdkfd: Use wait_queue_t to implement event waiting
>   drm/amdkfd: Simplify events page allocator
>   drm/amdkfd: Simplify event ID and signal slot management
>   drm/amdkfd: Use IH context ID for signal lookup
>   drm/amdkfd: Make event limit dependent on user mode mapping size
>
> Oded Gabbay (1):
>   drm/amdkfd: increase limit of signal events to 4096 per process
>
> Sean Keely (2):
>   drm/amdkfd: Short cut for kfd_wait_on_events without waiting
>   drm/amdkfd: Fix scheduler race in kfd_wait_on_events sleep loop
>
>  drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c |   8 +-
>  drivers/gpu/drm/amd/amdkfd/cik_int.h |   3 +-
>  drivers/gpu/drm/amd/amdkfd/kfd_chardev.c |   5 +-
>  drivers/gpu/drm/amd/amdkfd/kfd_device.c  |   2 +-
>  drivers/gpu/drm/amd/amdkfd/kfd_events.c  | 615
> +++
>  drivers/gpu/drm/amd/amdkfd/kfd_events.h  |  18 +-
>  drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c   |  83 ++-
>  drivers/gpu/drm/amd/amdkfd/kfd_priv.h|  33 +-
>  drivers/gpu/drm/amd/amdkfd/kfd_process.c |   1 -
>  include/uapi/linux/kfd_ioctl.h   |   2 +-
>  10 files changed, 350 insertions(+), 420 deletions(-)
>
> --
> 2.7.4
>
>
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] drm/radeon: deprecate and remove KFD interface

2017-11-01 Thread Oded Gabbay
ok, taken to -next.

On Tue, Oct 31, 2017 at 4:56 PM, Christian König <
ckoenig.leichtzumer...@gmail.com> wrote:

> Am 31.10.2017 um 11:44 schrieb Oded Gabbay:
>
>> Don't have any strong objection, but I just want to ask if current
>> users can just move to using amdgpu on KV and their current usermode
>> stack will work as usual.
>>
>
> Yes, I think so.
>
> btw, are there any "current users" that you are aware of ?
>>
>
> Not the slightest idea, but from Felix comments at least the current user
> mode stack + old kernel won't work.
>
> Regards,
> Christian.
>
>
>
>>
>> On Mon, Oct 30, 2017 at 3:16 PM, Christian König
>>  wrote:
>>
>>> From: Christian König 
>>>
>>> To quote Felix: "For testing KV with current user mode stack, please use
>>> amdgpu. I don't expect this to work with radeon and I'm not planning to
>>> spend
>>> any effort on making radeon work with a current user mode stack."
>>>
>>> Only compile tested, but should be straight forward.
>>>
>>> Signed-off-by: Christian König 
>>> ---
>>>   drivers/gpu/drm/amd/amdkfd/Kconfig  |   2 +-
>>>   drivers/gpu/drm/radeon/Makefile |   3 +-
>>>   drivers/gpu/drm/radeon/cik.c|  14 +-
>>>   drivers/gpu/drm/radeon/cikd.h   |   2 -
>>>   drivers/gpu/drm/radeon/radeon.h |   3 -
>>>   drivers/gpu/drm/radeon/radeon_drv.c |  10 -
>>>   drivers/gpu/drm/radeon/radeon_kfd.c | 901
>>> 
>>>   drivers/gpu/drm/radeon/radeon_kfd.h |  47 --
>>>   drivers/gpu/drm/radeon/radeon_kms.c |   7 -
>>>   9 files changed, 4 insertions(+), 985 deletions(-)
>>>   delete mode 100644 drivers/gpu/drm/radeon/radeon_kfd.c
>>>   delete mode 100644 drivers/gpu/drm/radeon/radeon_kfd.h
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdkfd/Kconfig
>>> b/drivers/gpu/drm/amd/amdkfd/Kconfig
>>> index e13c67c..bc5a294 100644
>>> --- a/drivers/gpu/drm/amd/amdkfd/Kconfig
>>> +++ b/drivers/gpu/drm/amd/amdkfd/Kconfig
>>> @@ -4,6 +4,6 @@
>>>
>>>   config HSA_AMD
>>>  tristate "HSA kernel driver for AMD GPU devices"
>>> -   depends on (DRM_RADEON || DRM_AMDGPU) && AMD_IOMMU_V2 && X86_64
>>> +   depends on DRM_AMDGPU && AMD_IOMMU_V2 && X86_64
>>>  help
>>>Enable this if you want to use HSA features on AMD GPU
>>> devices.
>>> diff --git a/drivers/gpu/drm/radeon/Makefile
>>> b/drivers/gpu/drm/radeon/Makefile
>>> index be16c63..cf3e598 100644
>>> --- a/drivers/gpu/drm/radeon/Makefile
>>> +++ b/drivers/gpu/drm/radeon/Makefile
>>> @@ -102,8 +102,7 @@ radeon-y += \
>>>   radeon-y += \
>>>  radeon_vce.o \
>>>  vce_v1_0.o \
>>> -   vce_v2_0.o \
>>> -   radeon_kfd.o
>>> +   vce_v2_0.o
>>>
>>>   radeon-$(CONFIG_VGA_SWITCHEROO) += radeon_atpx_handler.o
>>>   radeon-$(CONFIG_ACPI) += radeon_acpi.o
>>> diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c
>>> index 3cb6c55..898f9a0 100644
>>> --- a/drivers/gpu/drm/radeon/cik.c
>>> +++ b/drivers/gpu/drm/radeon/cik.c
>>> @@ -33,7 +33,6 @@
>>>   #include "cik_blit_shaders.h"
>>>   #include "radeon_ucode.h"
>>>   #include "clearstate_ci.h"
>>> -#include "radeon_kfd.h"
>>>
>>>   #define SH_MEM_CONFIG_GFX_DEFAULT \
>>>  ALIGNMENT_MODE(SH_MEM_ALIGNMENT_MODE_UNALIGNED)
>>> @@ -5684,10 +5683,9 @@ int cik_vm_init(struct radeon_device *rdev)
>>>  /*
>>>   * number of VMs
>>>   * VMID 0 is reserved for System
>>> -* radeon graphics/compute will use VMIDs 1-7
>>> -* amdkfd will use VMIDs 8-15
>>> +* radeon graphics/compute will use VMIDs 1-15
>>>   */
>>> -   rdev->vm_manager.nvm = RADEON_NUM_OF_VMIDS;
>>> +   rdev->vm_manager.nvm = 16;
>>>  /* base offset of vram pages */
>>>  if (rdev->flags & RADEON_IS_IGP) {
>>>  u64 tmp = RREG32(MC_VM_FB_OFFSET);
>>> @@ -7589,9 +7587,6 @@ int cik_irq_process(struct radeon_device *rdev)
>>>  /* wptr/rptr are in bytes! */
>>>  ring_index = rptr / 4;
>>>
>>> -   radeon_kfd_interrupt(rdev,
>>> -   (const void *)
>>> >ih.ring[ring_index]);
>>> -
>>>  src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) &
>>> 0xff;
>>>  src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1])
>>> & 0xfff;
>>>  ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) &
>>> 0xff;
>>> @@ -8486,10 +8481,6 @@ static int cik_startup(struct radeon_device *rdev)
>>>  if (r)
>>>  return r;
>>>
>>> -   r = radeon_kfd_resume(rdev);
>>> -   if (r)
>>> -   return r;
>>> -
>>>  return 0;
>>>   }
>>>
>>> @@ -8538,7 +8529,6 @@ int cik_resume(struct radeon_device *rdev)
>>>*/
>>>   int cik_suspend(struct radeon_device *rdev)
>>>   {
>>> -   radeon_kfd_suspend(rdev);
>>>  radeon_pm_suspend(rdev);
>>>  radeon_audio_fini(rdev);
>>>  radeon_vm_manager_fini(rdev);
>>> diff --git 

Re: release exclusive mode after hw init if no kfd

2017-11-01 Thread Ding, Pixel
Hi Oded,

Please ignore them so far. I need to consider more like that the probe is 
included in retry init logics.
— 
Sincerely Yours,
Pixel








On 01/11/2017, 1:53 PM, "amd-gfx on behalf of Pixel Ding" 
 wrote:

>Hi Oded,
>
>Please review.
>
>[PATCH 1/2] drm/amdkfd: initialise kgd field inside kfd device_init
>As you suggested, move kgd assignment to device_init
>
>[PATCH 2/2] drm/amdgpu: release exclusive mode after hw_init if no
>We still need this change because pdev is passed in.
>___
>amd-gfx mailing list
>amd-gfx@lists.freedesktop.org
>https://lists.freedesktop.org/mailman/listinfo/amd-gfx
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx