[PATCH 1/2] drm/amdkfd: initialise kgd field inside kfd device_init
From: pding kgd field is dependent on kgd device_init. Move the assignment to kfd device_init. Signed-off-by: pding --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 6 +++--- drivers/gpu/drm/amd/amdkfd/kfd_device.c | 8 drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 6 +++--- drivers/gpu/drm/amd/include/kgd_kfd_interface.h | 4 ++-- drivers/gpu/drm/radeon/radeon_kfd.c | 6 +++--- 5 files changed, 15 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index c70cda0..5b10ce9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -89,8 +89,7 @@ void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev) return; } - adev->kfd = kgd2kfd->probe((struct kgd_dev *)adev, - adev->pdev, kfd2kgd); + adev->kfd = kgd2kfd->probe(adev->pdev, kfd2kgd); } void amdgpu_amdkfd_device_init(struct amdgpu_device *adev) @@ -131,7 +130,8 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev) &gpu_resources.doorbell_aperture_size, &gpu_resources.doorbell_start_offset); - kgd2kfd->device_init(adev->kfd, &gpu_resources); + kgd2kfd->device_init(adev->kfd, (struct kgd_dev *)adev, +&gpu_resources); } } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index 5df12b2..b95fc61 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -109,8 +109,8 @@ static const struct kfd_device_info *lookup_device_info(unsigned short did) return NULL; } -struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd, - struct pci_dev *pdev, const struct kfd2kgd_calls *f2g) +struct kfd_dev *kgd2kfd_probe(struct pci_dev *pdev, + const struct kfd2kgd_calls *f2g) { struct kfd_dev *kfd; @@ -126,7 +126,6 @@ struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd, if (!kfd) return NULL; - kfd->kgd = kgd; kfd->device_info = device_info; kfd->pdev = pdev; kfd->init_complete = false; @@ -217,11 +216,12 @@ static int iommu_invalid_ppr_cb(struct pci_dev *pdev, int pasid, return AMD_IOMMU_INV_PRI_RSP_INVALID; } -bool kgd2kfd_device_init(struct kfd_dev *kfd, +bool kgd2kfd_device_init(struct kfd_dev *kfd, struct kgd_dev *kgd, const struct kgd2kfd_shared_resources *gpu_resources) { unsigned int size; + kfd->kgd = kgd; kfd->shared_resources = *gpu_resources; /* calculate max size of mqds needed for queues */ diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index 4cb90f5..cf97e7f 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -200,9 +200,9 @@ struct kfd_dev { /* KGD2KFD callbacks */ void kgd2kfd_exit(void); -struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd, - struct pci_dev *pdev, const struct kfd2kgd_calls *f2g); -bool kgd2kfd_device_init(struct kfd_dev *kfd, +struct kfd_dev *kgd2kfd_probe(struct pci_dev *pdev, + const struct kfd2kgd_calls *f2g); +bool kgd2kfd_device_init(struct kfd_dev *kfd, struct kgd_dev *kgd, const struct kgd2kfd_shared_resources *gpu_resources); void kgd2kfd_device_exit(struct kfd_dev *kfd); diff --git a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h index f516fd1..f9ca238 100644 --- a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h +++ b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h @@ -248,9 +248,9 @@ struct kfd2kgd_calls { */ struct kgd2kfd_calls { void (*exit)(void); - struct kfd_dev* (*probe)(struct kgd_dev *kgd, struct pci_dev *pdev, + struct kfd_dev* (*probe)(struct pci_dev *pdev, const struct kfd2kgd_calls *f2g); - bool (*device_init)(struct kfd_dev *kfd, + bool (*device_init)(struct kfd_dev *kfd, struct kgd_dev *kgd, const struct kgd2kfd_shared_resources *gpu_resources); void (*device_exit)(struct kfd_dev *kfd); void (*interrupt)(struct kfd_dev *kfd, const void *ih_ring_entry); diff --git a/drivers/gpu/drm/radeon/radeon_kfd.c b/drivers/gpu/drm/radeon/radeon_kfd.c index 385b4d7..083deb1 100644 --- a/drivers/gpu/drm/radeon/radeon_kfd.c +++ b/drivers/gpu/drm/radeon/radeon_kfd.c @@ -181,8 +181,7 @@ void radeon_kfd_fini(void) void radeon_kfd_device_probe(struct radeon_device *rdev) { if (kgd2kfd) - rdev->kfd = kgd2kfd->probe((struct kgd_dev *)rdev, - rdev->pdev, &kfd2kgd); + rdev->kfd = kgd2kfd->probe(rdev->pdev, &kfd2kgd); }
[PATCH 2/2] drm/amdgpu: release exclusive mode after hw_init if no kfd (v2)
From: pding Move kfd probe prior to device init. Release exclusive mode after hw_init if kfd is not enabled. v2: - pass pdev param Signed-off-by: pding --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 5 +++-- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 3 ++- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 3 +++ drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c| 5 +++-- 4 files changed, 11 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index 5b10ce9..83d18c8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -68,7 +68,8 @@ void amdgpu_amdkfd_fini(void) } } -void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev) +void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev, + struct pci_dev *pdev) { const struct kfd2kgd_calls *kfd2kgd; @@ -89,7 +90,7 @@ void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev) return; } - adev->kfd = kgd2kfd->probe(adev->pdev, kfd2kgd); + adev->kfd = kgd2kfd->probe(pdev, kfd2kgd); } void amdgpu_amdkfd_device_init(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index 8d689ab..707c892 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -44,7 +44,8 @@ void amdgpu_amdkfd_suspend(struct amdgpu_device *adev); int amdgpu_amdkfd_resume(struct amdgpu_device *adev); void amdgpu_amdkfd_interrupt(struct amdgpu_device *adev, const void *ih_ring_entry); -void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev); +void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev, + struct pci_dev *pdev); void amdgpu_amdkfd_device_init(struct amdgpu_device *adev); void amdgpu_amdkfd_device_fini(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 068b56a..ef01aa3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -1716,6 +1716,9 @@ static int amdgpu_init(struct amdgpu_device *adev) adev->ip_blocks[i].status.hw = true; } + if (amdgpu_sriov_vf(adev) && !adev->kfd) + amdgpu_virt_release_full_gpu(adev, true); + return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index 3e9760d..f872052 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -138,6 +138,8 @@ int amdgpu_driver_load_kms(struct drm_device *dev, unsigned long flags) !pci_is_thunderbolt_attached(dev->pdev)) flags |= AMD_IS_PX; + amdgpu_amdkfd_device_probe(adev, dev->pdev); + /* amdgpu_device_init should report only fatal error * like memory allocation failure or iomapping failure, * or memory manager initialization failure, it must @@ -170,7 +172,6 @@ int amdgpu_driver_load_kms(struct drm_device *dev, unsigned long flags) "Error during ACPI methods call\n"); } - amdgpu_amdkfd_device_probe(adev); amdgpu_amdkfd_device_init(adev); if (amdgpu_device_is_px(dev)) { @@ -182,7 +183,7 @@ int amdgpu_driver_load_kms(struct drm_device *dev, unsigned long flags) pm_runtime_put_autosuspend(dev->dev); } - if (amdgpu_sriov_vf(adev)) + if (amdgpu_sriov_vf(adev) && adev->kfd) amdgpu_virt_release_full_gpu(adev, true); out: -- 2.9.5 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
release exclusive mode after hw init if no kfd
Hi Oded, Please review. [PATCH 1/2] drm/amdkfd: initialise kgd field inside kfd device_init As you suggested, move kgd assignment to device_init [PATCH 2/2] drm/amdgpu: release exclusive mode after hw_init if no We still need this change because pdev is passed in. ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH] drm/amdgpu:read VRAMLOST from gim
Change-Id: I6a268903465004d6e8f65f135734094772b9f614 Signed-off-by: Monk Liu --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 9 - drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h | 3 +++ 3 files changed, 8 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 068b56a..eccb3fa 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -2953,11 +2953,10 @@ static int amdgpu_reset_sriov(struct amdgpu_device *adev, uint64_t *reset_flags, amdgpu_virt_release_full_gpu(adev, true); if (reset_flags) { - /* will get vram_lost from GIM in future, now all -* reset request considered VRAM LOST -*/ - (*reset_flags) |= ~AMDGPU_RESET_INFO_VRAM_LOST; - atomic_inc(&adev->vram_lost_counter); + if (adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) { + (*reset_flags) |= AMDGPU_RESET_INFO_VRAM_LOST; + atomic_inc(&adev->vram_lost_counter); + } /* VF FLR or hotlink reset is always full-reset */ (*reset_flags) |= AMDGPU_RESET_INFO_FULLRESET; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c index f791518..9cd030a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c @@ -332,6 +332,7 @@ void amdgpu_virt_init_data_exchange(struct amdgpu_device *adev) pf2vf_ver = adev->virt.fw_reserve.p_pf2vf->version; AMDGPU_FW_VRAM_PF2VF_READ(adev, header.size, &pf2vf_size); AMDGPU_FW_VRAM_PF2VF_READ(adev, checksum, &checksum); + AMDGPU_FW_VRAM_PF2VF_READ(adev, feature_flags, &adev->virt.gim_feature); /* pf2vf message must be in 4K */ if (pf2vf_size > 0 && pf2vf_size < 4096) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h index e3f78f5..f77d116 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h @@ -81,6 +81,8 @@ enum AMDGIM_FEATURE_FLAG { AMDGIM_FEATURE_ERROR_LOG_COLLECT = 0x1, /* GIM supports feature of loading uCodes */ AMDGIM_FEATURE_GIM_LOAD_UCODES = 0x2, + /* VRAM LOST by GIM */ + AMDGIM_FEATURE_GIM_FLR_VRAMLOST = 0x4, }; struct amdgim_pf2vf_info_header { @@ -246,6 +248,7 @@ struct amdgpu_virt { const struct amdgpu_virt_ops*ops; struct amdgpu_vf_error_buffer vf_errors; struct amdgpu_virt_fw_reserve fw_reserve; + uint32_t gim_feature; }; #define AMDGPU_CSA_SIZE(8 * 1024) -- 2.7.4 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
RE: [PATCH 1/2] drm/amdgpu:add fw-vram-usage for atomfirmware
Yeah, I'll cleanup them before pushing -Original Message- From: Deucher, Alexander Sent: 2017年10月31日 23:01 To: Liu, Monk ; amd-gfx@lists.freedesktop.org Cc: Liu, Monk Subject: RE: [PATCH 1/2] drm/amdgpu:add fw-vram-usage for atomfirmware > -Original Message- > From: amd-gfx [mailto:amd-gfx-boun...@lists.freedesktop.org] On Behalf > Of Monk Liu > Sent: Tuesday, October 31, 2017 7:55 AM > To: amd-gfx@lists.freedesktop.org > Cc: Liu, Monk > Subject: [PATCH 1/2] drm/amdgpu:add fw-vram-usage for atomfirmware > > otherwise PF & VF exchange is broken > > Change-Id: Icbb44e640ba1c6e61914cbd234e92de001496195 > Signed-off-by: Monk Liu > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c | 26 > > 1 file changed, 22 insertions(+), 4 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c > index f9ffe8e..455ad63 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c > @@ -71,19 +71,37 @@ int > amdgpu_atomfirmware_allocate_fb_scratch(struct amdgpu_device *adev) > struct atom_context *ctx = adev->mode_info.atom_context; > int index = > get_index_into_master_table(atom_master_list_of_data_tables_v2_1, > vram_usagebyfirmware); > + struct vram_usagebyfirmware_v2_1 * firmware_usage; > + uint32_t start_addr, size; > uint16_t data_offset; > int usage_bytes = 0; > > if (amdgpu_atom_parse_data_header(ctx, index, NULL, NULL, NULL, > &data_offset)) { > - struct vram_usagebyfirmware_v2_1 *firmware_usage = > - (struct vram_usagebyfirmware_v2_1 *)(ctx->bios + > data_offset); > + printk("data_offset=%x\n", data_offset); Drop this debugging leftover. > + firmware_usage = (struct vram_usagebyfirmware_v2_1 > *)(ctx->bios + data_offset); > > - DRM_DEBUG("atom firmware requested %08x %dkb fw > %dkb drv\n", > + DRM_INFO("atom firmware requested %08x %dkb fw %dkb Any reason to make this non-debug? > drv\n", > le32_to_cpu(firmware_usage- > >start_address_in_kb), > le16_to_cpu(firmware_usage- > >used_by_firmware_in_kb), > le16_to_cpu(firmware_usage- > >used_by_driver_in_kb)); > > - usage_bytes = le16_to_cpu(firmware_usage- > >used_by_driver_in_kb) * 1024; > + start_addr = le32_to_cpu(firmware_usage- > >start_address_in_kb); > + size = le16_to_cpu(firmware_usage- > >used_by_firmware_in_kb); > + > + printk("start_addr = %x, size=%x\n", start_addr, size); More debug leftovers. With the above comments addressed, the patch is: Reviewed-by: Alex Deucher > + > + if ((uint32_t)(start_addr & > ATOM_VRAM_OPERATION_FLAGS_MASK) == > + > (uint32_t)(ATOM_VRAM_BLOCK_SRIOV_MSG_SHARE_RESERVATIO > N << > + ATOM_VRAM_OPERATION_FLAGS_SHIFT)) { > + /* Firmware request VRAM reservation for SR-IOV */ > + adev->fw_vram_usage.start_offset = (start_addr & > + > (~ATOM_VRAM_OPERATION_FLAGS_MASK)) << 10; > + adev->fw_vram_usage.size = size << 10; > + /* Use the default scratch size */ > + usage_bytes = 0; > + } else { > + usage_bytes = le16_to_cpu(firmware_usage- > >used_by_driver_in_kb) << 10; > + } > } > ctx->scratch_size_bytes = 0; > if (usage_bytes == 0) > -- > 2.7.4 > > ___ > amd-gfx mailing list > amd-gfx@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/amd-gfx ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
RE: [PATCH 2/2] drm/amdgpu:cleanup deprecated gpu reset logic
The thing is triggering gpu_recover() in irq routine give you NULL for the @bad/job parameter, so gpu_recover() actually did nothing meaningful, it just repeat scheduling un-signaled jobs again and again, and finally your GPU is stuck with infinite recovering here is my initial thought: In " gfx_v9_0_priv_reg_irq(struct amdgpu_device *adev, struct amdgpu_iv_entry *entry)" routine: For lockup_timeout==0 case: we put a new parameter "@sched" to amd_gpu_recover(), that way although we don't have @bad/job, but with @sched we can still find out the pending job on his scheduler, But that make code ugly, cuz we need to change amdgpu_gpu_recover(adev, job) to amdgpu_gpu_recover(adev, job, sched). Anyway need to find a way to tell gpu_recover() at least which ring has problem. For lockup_timeout!=0 case: did nothing and just return since TDR will correct this error and do recover() gracefully. -Original Message- From: Christian König [mailto:ckoenig.leichtzumer...@gmail.com] Sent: 2017年10月31日 23:01 To: Liu, Monk ; amd-gfx@lists.freedesktop.org Subject: Re: [PATCH 2/2] drm/amdgpu:cleanup deprecated gpu reset logic Am 31.10.2017 um 12:55 schrieb Monk Liu: > trigger gpu reset/recovery from illegle instruction IRQ is deprecated > long time ago, we already switch to recover gpu by TDR triggering. > > now please set lockup_timeout to non-zero value in driver loading to > enable TDR. The patch is ok, but NAK to the general approach. We should have illegal instruction/access alongside the timeout. But instead of trying to trigger the reset directly inform the scheduler that the we detected a problem on the engine. The scheduler can then cancel the timeout and do the appropriate things. This patch would be ok if you install this new functionality directly after removing the old (and broken) one. Regards, Christian. > > Change-Id: I45a576a97fd9859e1098e785ce857c2cf5adfba5 > Signed-off-by: Monk Liu > --- > drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 - > drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c | 21 - > drivers/gpu/drm/amd/amdgpu/cik_sdma.c | 1 - > drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c | 2 -- > drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c | 3 --- > drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 2 -- > drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 2 -- > drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c | 1 - > drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c | 1 - > drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | 1 - > drivers/gpu/drm/amd/amdgpu/si_dma.c | 1 - > 11 files changed, 36 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h > b/drivers/gpu/drm/amd/amdgpu/amdgpu.h > index 6e89be5..b3453e3 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h > @@ -1459,7 +1459,6 @@ struct amdgpu_device { > boolshutdown; > boolneed_dma32; > boolaccel_working; > - struct work_struct reset_work; > struct notifier_block acpi_nb; > struct amdgpu_i2c_chan *i2c_bus[AMDGPU_MAX_I2C_BUS]; > struct amdgpu_debugfs debugfs[AMDGPU_DEBUGFS_MAX_COMPONENTS]; > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c > index c340774..989b530 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c > @@ -73,23 +73,6 @@ static void amdgpu_hotplug_work_func(struct work_struct > *work) > drm_helper_hpd_irq_event(dev); > } > > -/** > - * amdgpu_irq_reset_work_func - execute gpu reset > - * > - * @work: work struct > - * > - * Execute scheduled gpu reset (cayman+). > - * This function is called when the irq handler > - * thinks we need a gpu reset. > - */ > -static void amdgpu_irq_reset_work_func(struct work_struct *work) -{ > - struct amdgpu_device *adev = container_of(work, struct amdgpu_device, > - reset_work); > - > - if (!amdgpu_sriov_vf(adev)) > - amdgpu_gpu_recover(adev, NULL); > -} > > /* Disable *all* interrupts */ > static void amdgpu_irq_disable_all(struct amdgpu_device *adev) @@ > -251,14 +234,11 @@ int amdgpu_irq_init(struct amdgpu_device *adev) > amdgpu_hotplug_work_func); > } > > - INIT_WORK(&adev->reset_work, amdgpu_irq_reset_work_func); > - > adev->irq.installed = true; > r = drm_irq_install(adev->ddev, adev->ddev->pdev->irq); > if (r) { > adev->irq.installed = false; > flush_work(&adev->hotplug_work); > - cancel_work_sync(&adev->reset_work); > return r; > } > > @@ -283,7 +263,6 @@ void amdgpu_irq_fini(struct amdgpu_device *adev) > if (adev->irq.msi_enabled) > pci_disable_msi(adev->pdev); > flush_work(&adev->hotplug_work);
RE: [PATCH] drm/amd/powerplay: fix memory leak of hardcoded pptable
Reviewed-by: Rex Zhu Best Regards Rex -Original Message- From: amd-gfx [mailto:amd-gfx-boun...@lists.freedesktop.org] On Behalf Of Eric Huang Sent: Wednesday, November 01, 2017 5:40 AM To: amd-gfx@lists.freedesktop.org Cc: Huang, JinHuiEric Subject: [PATCH] drm/amd/powerplay: fix memory leak of hardcoded pptable Signed-off-by: Eric Huang --- drivers/gpu/drm/amd/powerplay/amd_powerplay.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/amd/powerplay/amd_powerplay.c b/drivers/gpu/drm/amd/powerplay/amd_powerplay.c index d4ff742..9d3bdad 100644 --- a/drivers/gpu/drm/amd/powerplay/amd_powerplay.c +++ b/drivers/gpu/drm/amd/powerplay/amd_powerplay.c @@ -78,6 +78,9 @@ static int amd_powerplay_destroy(void *handle) { struct pp_instance *instance = (struct pp_instance *)handle; + kfree(instance->hwmgr->hardcode_pp_table); + instance->hwmgr->hardcode_pp_table = NULL; + kfree(instance->hwmgr); instance->hwmgr = NULL; -- 2.7.4 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH] drm/amdgpu: release exclusive mode after hw_init if no kfd
I’m not for sure about this case you talked about. Assume that it could happen and the KFD probe and init are invoked when loading it manually. For baremetal device, it’s always correct. For SRIOV virtual function, it doesn’t behave correctly with or without this patch. KFD initialization also needs to access VF in exclusive mode, while the exclusive mode request/release messages are sent in amdgpu_device_init. — Sincerely Yours, Pixel On 31/10/2017, 11:06 PM, "Tom Stellard" wrote: >On 10/30/2017 12:57 AM, Pixel Ding wrote: >> From: pding >> >> Move kfd probe prior to device init. Release exclusive mode >> after hw_init if kfd is not enabled. >> > >What happens if only the amdgpu module is loaded at startup, and then the >user manually loads the amdkfd module at some point later on. Will the driver >still behave correctly in this case with this patch? > >-Tom > > >> Signed-off-by: pding >> --- >> drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 3 +++ >> drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c| 5 +++-- >> 2 files changed, 6 insertions(+), 2 deletions(-) >> >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c >> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c >> index 400dfaa..e46ec51 100644 >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c >> @@ -1716,6 +1716,9 @@ static int amdgpu_init(struct amdgpu_device *adev) >> adev->ip_blocks[i].status.hw = true; >> } >> >> +if (amdgpu_sriov_vf(adev) && !adev->kfd) >> +amdgpu_virt_release_full_gpu(adev, true); >> + >> return 0; >> } >> >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c >> b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c >> index 3e9760d..e91907c 100644 >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c >> @@ -138,6 +138,8 @@ int amdgpu_driver_load_kms(struct drm_device *dev, >> unsigned long flags) >> !pci_is_thunderbolt_attached(dev->pdev)) >> flags |= AMD_IS_PX; >> >> +amdgpu_amdkfd_device_probe(adev); >> + >> /* amdgpu_device_init should report only fatal error >> * like memory allocation failure or iomapping failure, >> * or memory manager initialization failure, it must >> @@ -170,7 +172,6 @@ int amdgpu_driver_load_kms(struct drm_device *dev, >> unsigned long flags) >> "Error during ACPI methods call\n"); >> } >> >> -amdgpu_amdkfd_device_probe(adev); >> amdgpu_amdkfd_device_init(adev); >> >> if (amdgpu_device_is_px(dev)) { >> @@ -182,7 +183,7 @@ int amdgpu_driver_load_kms(struct drm_device *dev, >> unsigned long flags) >> pm_runtime_put_autosuspend(dev->dev); >> } >> >> -if (amdgpu_sriov_vf(adev)) >> +if (amdgpu_sriov_vf(adev) && adev->kfd) >> amdgpu_virt_release_full_gpu(adev, true); >> >> out: >> > ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH] drm/amd/powerplay: fix memory leak of hardcoded pptable
Signed-off-by: Eric Huang --- drivers/gpu/drm/amd/powerplay/amd_powerplay.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/amd/powerplay/amd_powerplay.c b/drivers/gpu/drm/amd/powerplay/amd_powerplay.c index d4ff742..9d3bdad 100644 --- a/drivers/gpu/drm/amd/powerplay/amd_powerplay.c +++ b/drivers/gpu/drm/amd/powerplay/amd_powerplay.c @@ -78,6 +78,9 @@ static int amd_powerplay_destroy(void *handle) { struct pp_instance *instance = (struct pp_instance *)handle; + kfree(instance->hwmgr->hardcode_pp_table); + instance->hwmgr->hardcode_pp_table = NULL; + kfree(instance->hwmgr); instance->hwmgr = NULL; -- 2.7.4 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH] drm/radeon: fix atombios on big endian
On Mon, Oct 30, 2017 at 6:56 AM, Roman Kapl wrote: > The function for byteswapping the data send to/from atombios was buggy for > num_bytes not divisible by four. The function must be aware of the fact > that after byte-swapping the u32 units, valid bytes might end up after the > num_bytes boundary. > > This patch was tested on kernel 3.12 and allowed us to sucesfully use > DisplayPort on and Radeon SI card. Namely it fixed the link training and > EDID readout. > > The function is patched both in radeon and amd drivers, since the functions > and the fixes are identical. > > Signed-off-by: Roman Kapl Applied. thanks! Alex > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c | 38 > +--- > drivers/gpu/drm/radeon/atombios_dp.c | 38 > +--- > 2 files changed, 36 insertions(+), 40 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c > index ce443586a0c7..cc4e18dcd8b6 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c > @@ -1766,34 +1766,32 @@ bool amdgpu_atombios_scratch_need_asic_init(struct > amdgpu_device *adev) > return true; > } > > -/* Atom needs data in little endian format > - * so swap as appropriate when copying data to > - * or from atom. Note that atom operates on > - * dw units. > +/* Atom needs data in little endian format so swap as appropriate when > copying > + * data to or from atom. Note that atom operates on dw units. > + * > + * Use to_le=true when sending data to atom and provide at least > + * ALIGN(num_bytes,4) bytes in the dst buffer. > + * > + * Use to_le=false when receiving data from atom and provide > ALIGN(num_bytes,4) > + * byes in the src buffer. > */ > void amdgpu_atombios_copy_swap(u8 *dst, u8 *src, u8 num_bytes, bool to_le) > { > #ifdef __BIG_ENDIAN > - u8 src_tmp[20], dst_tmp[20]; /* used for byteswapping */ > - u32 *dst32, *src32; > + u32 src_tmp[5], dst_tmp[5]; > int i; > + u8 align_num_bytes = ALIGN(num_bytes, 4); > > - memcpy(src_tmp, src, num_bytes); > - src32 = (u32 *)src_tmp; > - dst32 = (u32 *)dst_tmp; > if (to_le) { > - for (i = 0; i < ((num_bytes + 3) / 4); i++) > - dst32[i] = cpu_to_le32(src32[i]); > - memcpy(dst, dst_tmp, num_bytes); > + memcpy(src_tmp, src, num_bytes); > + for (i = 0; i < align_num_bytes / 4; i++) > + dst_tmp[i] = cpu_to_le32(src_tmp[i]); > + memcpy(dst, dst_tmp, align_num_bytes); > } else { > - u8 dws = num_bytes & ~3; > - for (i = 0; i < ((num_bytes + 3) / 4); i++) > - dst32[i] = le32_to_cpu(src32[i]); > - memcpy(dst, dst_tmp, dws); > - if (num_bytes % 4) { > - for (i = 0; i < (num_bytes % 4); i++) > - dst[dws+i] = dst_tmp[dws+i]; > - } > + memcpy(src_tmp, src, align_num_bytes); > + for (i = 0; i < align_num_bytes / 4; i++) > + dst_tmp[i] = le32_to_cpu(src_tmp[i]); > + memcpy(dst, dst_tmp, num_bytes); > } > #else > memcpy(dst, src, num_bytes); > diff --git a/drivers/gpu/drm/radeon/atombios_dp.c > b/drivers/gpu/drm/radeon/atombios_dp.c > index 432cb46f6a34..fd7682bf335d 100644 > --- a/drivers/gpu/drm/radeon/atombios_dp.c > +++ b/drivers/gpu/drm/radeon/atombios_dp.c > @@ -45,34 +45,32 @@ static char *pre_emph_names[] = { > > /* radeon AUX functions */ > > -/* Atom needs data in little endian format > - * so swap as appropriate when copying data to > - * or from atom. Note that atom operates on > - * dw units. > +/* Atom needs data in little endian format so swap as appropriate when > copying > + * data to or from atom. Note that atom operates on dw units. > + * > + * Use to_le=true when sending data to atom and provide at least > + * ALIGN(num_bytes,4) bytes in the dst buffer. > + * > + * Use to_le=false when receiving data from atom and provide > ALIGN(num_bytes,4) > + * byes in the src buffer. > */ > void radeon_atom_copy_swap(u8 *dst, u8 *src, u8 num_bytes, bool to_le) > { > #ifdef __BIG_ENDIAN > - u8 src_tmp[20], dst_tmp[20]; /* used for byteswapping */ > - u32 *dst32, *src32; > + u32 src_tmp[5], dst_tmp[5]; > int i; > + u8 align_num_bytes = ALIGN(num_bytes, 4); > > - memcpy(src_tmp, src, num_bytes); > - src32 = (u32 *)src_tmp; > - dst32 = (u32 *)dst_tmp; > if (to_le) { > - for (i = 0; i < ((num_bytes + 3) / 4); i++) > - dst32[i] = cpu_to_le32(src32[i]); > - memcpy(dst, dst_tmp, num_bytes); > + memcpy(src_tmp, src, num_bytes); > + for (i = 0; i < align_num_bytes / 4; i++) > +
[PATCH V2 2/2] drm/amd/display: Read resource_straps from registers for DCE12
From: "Leo (Sunpeng) Li" Now that the registers exist, assign them to the resource_straps struct. v2: Fix indentation Signed-off-by: Leo (Sunpeng) Li --- .../gpu/drm/amd/display/dc/dce120/dce120_resource.c| 18 -- 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c b/drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c index 3ed28a8..15509a8 100644 --- a/drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c @@ -501,12 +501,18 @@ static void read_dce_straps( struct dc_context *ctx, struct resource_straps *straps) { - /* TODO: Registers are missing */ - /*REG_GET_2(CC_DC_HDMI_STRAPS, - HDMI_DISABLE, &straps->hdmi_disable, - AUDIO_STREAM_NUMBER, &straps->audio_stream_number); - - REG_GET(DC_PINSTRAPS, DC_PINSTRAPS_AUDIO, &straps->dc_pinstraps_audio);*/ + uint32_t reg_val = dm_read_reg_soc15(ctx, mmCC_DC_MISC_STRAPS, 0); + straps->audio_stream_number = get_reg_field_value(reg_val, + CC_DC_MISC_STRAPS, + AUDIO_STREAM_NUMBER); + straps->hdmi_disable = get_reg_field_value(reg_val, + CC_DC_MISC_STRAPS, + HDMI_DISABLE); + + reg_val = dm_read_reg_soc15(ctx, mmDC_PINSTRAPS, 0); + straps->dc_pinstraps_audio = get_reg_field_value(reg_val, +DC_PINSTRAPS, +DC_PINSTRAPS_AUDIO); } static struct audio *create_audio( -- 2.7.4 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH 2/2] drm/amd/display: Read resource_straps from registers for DCE12
On 2017-10-31 04:28 PM, sunpeng...@amd.com wrote: > From: "Leo (Sunpeng) Li" > > Now that the registers exist, assign them to the resource_straps struct. > > Signed-off-by: Leo (Sunpeng) Li > --- > drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c | 15 +-- > 1 file changed, 9 insertions(+), 6 deletions(-) > > diff --git a/drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c > b/drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c > index 3ed28a8..5023a03 100644 > --- a/drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c > +++ b/drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c > @@ -501,12 +501,15 @@ static void read_dce_straps( > struct dc_context *ctx, > struct resource_straps *straps) > { > - /* TODO: Registers are missing */ > - /*REG_GET_2(CC_DC_HDMI_STRAPS, > - HDMI_DISABLE, &straps->hdmi_disable, > - AUDIO_STREAM_NUMBER, &straps->audio_stream_number); > - > - REG_GET(DC_PINSTRAPS, DC_PINSTRAPS_AUDIO, > &straps->dc_pinstraps_audio);*/ > + uint32_t reg_val = dm_read_reg_soc15(ctx, mmCC_DC_MISC_STRAPS, 0); > + straps->audio_stream_number = get_reg_field_value( > + reg_val, CC_DC_MISC_STRAPS, AUDIO_STREAM_NUMBER); > + straps->hdmi_disable = get_reg_field_value( > + reg_val, CC_DC_MISC_STRAPS, HDMI_DISABLE); The indentation looks wrong. I know we have the same in other parts of this code but let's try to consider this for new code. In particular reg_val should be on the same line as the function call, the rest should be on new lines, aligned with tabs and spaces to reg_val. > + > + reg_val = dm_read_reg_soc15(ctx, mmDC_PINSTRAPS, 0); > + straps->dc_pinstraps_audio = get_reg_field_value( > + reg_val, DC_PINSTRAPS, DC_PINSTRAPS_AUDIO); Nitpick on indentation here as well. With those nitpicks addressed the series is Reviewed-by: Harry Wentland Harry > } > > static struct audio *create_audio( > ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 2/2] drm/amd/display: Read resource_straps from registers for DCE12
From: "Leo (Sunpeng) Li" Now that the registers exist, assign them to the resource_straps struct. Signed-off-by: Leo (Sunpeng) Li --- drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c | 15 +-- 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c b/drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c index 3ed28a8..5023a03 100644 --- a/drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c @@ -501,12 +501,15 @@ static void read_dce_straps( struct dc_context *ctx, struct resource_straps *straps) { - /* TODO: Registers are missing */ - /*REG_GET_2(CC_DC_HDMI_STRAPS, - HDMI_DISABLE, &straps->hdmi_disable, - AUDIO_STREAM_NUMBER, &straps->audio_stream_number); - - REG_GET(DC_PINSTRAPS, DC_PINSTRAPS_AUDIO, &straps->dc_pinstraps_audio);*/ + uint32_t reg_val = dm_read_reg_soc15(ctx, mmCC_DC_MISC_STRAPS, 0); + straps->audio_stream_number = get_reg_field_value( + reg_val, CC_DC_MISC_STRAPS, AUDIO_STREAM_NUMBER); + straps->hdmi_disable = get_reg_field_value( + reg_val, CC_DC_MISC_STRAPS, HDMI_DISABLE); + + reg_val = dm_read_reg_soc15(ctx, mmDC_PINSTRAPS, 0); + straps->dc_pinstraps_audio = get_reg_field_value( + reg_val, DC_PINSTRAPS, DC_PINSTRAPS_AUDIO); } static struct audio *create_audio( -- 2.7.4 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 1/2] drm/amd: Add DCE12 resource strap registers
From: "Leo (Sunpeng) Li" We need them for initializing audio properly. Signed-off-by: Leo (Sunpeng) Li Reviewed-by: Harry Wentland Acked-by: Alex Deucher --- drivers/gpu/drm/amd/include/asic_reg/vega10/DC/dce_12_0_offset.h | 4 drivers/gpu/drm/amd/include/asic_reg/vega10/DC/dce_12_0_sh_mask.h | 8 2 files changed, 12 insertions(+) diff --git a/drivers/gpu/drm/amd/include/asic_reg/vega10/DC/dce_12_0_offset.h b/drivers/gpu/drm/amd/include/asic_reg/vega10/DC/dce_12_0_offset.h index 75b660d..f730d06 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/vega10/DC/dce_12_0_offset.h +++ b/drivers/gpu/drm/amd/include/asic_reg/vega10/DC/dce_12_0_offset.h @@ -1841,6 +1841,10 @@ #define mmUNIPHYG_CHANNEL_XBAR_CNTL_BASE_IDX 2 #define mmDCIO_WRCMD_DELAY 0x2094 #define mmDCIO_WRCMD_DELAY_BASE_IDX 2 +#define mmDC_PINSTRAPS 0x2096 +#define mmDC_PINSTRAPS_BASE_IDX 2 +#define mmCC_DC_MISC_STRAPS 0x2097 +#define mmCC_DC_MISC_STRAPS_BASE_IDX 2 #define mmDC_DVODATA_CONFIG 0x2098 #define mmDC_DVODATA_CONFIG_BASE_IDX 2 #define mmLVTMA_PWRSEQ_CNTL 0x2099 diff --git a/drivers/gpu/drm/amd/include/asic_reg/vega10/DC/dce_12_0_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/vega10/DC/dce_12_0_sh_mask.h index d8ad862..6d3162c 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/vega10/DC/dce_12_0_sh_mask.h +++ b/drivers/gpu/drm/amd/include/asic_reg/vega10/DC/dce_12_0_sh_mask.h @@ -2447,6 +2447,14 @@ //DCCG_CBUS_WRCMD_DELAY #define DCCG_CBUS_WRCMD_DELAY__CBUS_PLL_WRCMD_DELAY__SHIFT 0x0 #define DCCG_CBUS_WRCMD_DELAY__CBUS_PLL_WRCMD_DELAY_MASK 0x000FL +//DC_PINSTRAPS +#define DC_PINSTRAPS__DC_PINSTRAPS_AUDIO__SHIFT 0xe +#define DC_PINSTRAPS__DC_PINSTRAPS_AUDIO_MASK 0xC000L +//CC_DC_MISC_STRAPS +#define CC_DC_MISC_STRAPS__HDMI_DISABLE__SHIFT 0x6 +#define CC_DC_MISC_STRAPS__AUDIO_STREAM_NUMBER__SHIFT 0x8 +#define CC_DC_MISC_STRAPS__HDMI_DISABLE_MASK 0x0040L +#define CC_DC_MISC_STRAPS__AUDIO_STREAM_NUMBER_MASK 0x0700L //DCCG_DS_DTO_INCR #define DCCG_DS_DTO_INCR__DCCG_DS_DTO_INCR__SHIFT 0x0 #define DCCG_DS_DTO_INCR__DCCG_DS_DTO_INCR_MASK 0xL -- 2.7.4 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 0/2] Enable HDMI audio on DCE12
From: "Leo (Sunpeng) Li" This series properly enables audio on DCE12 by adding the relevant registers, and reading them into the software structs. Previously, this was only working because the registers were being ignored. Now that they're being considered, HDMI audio on DCE12 stopped working due to missing code that fetched the necessary registers. Leo (Sunpeng) Li (2): drm/amd: Add DCE12 resource strap registers drm/amd/display: Read resource_straps from registers for DCE12 drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c | 15 +-- .../drm/amd/include/asic_reg/vega10/DC/dce_12_0_offset.h | 4 .../drm/amd/include/asic_reg/vega10/DC/dce_12_0_sh_mask.h | 8 3 files changed, 21 insertions(+), 6 deletions(-) -- 2.7.4 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: KASAN use-after-free report during piglit run
On 25/10/17 05:43 PM, Michel Dänzer wrote: > > KASAN caught another use-after-free on my development machine today, see > the attached dmesg excerpt. There haven't been any related changes in > amd-staging-drm-next since yesterday, so maybe userspace is just > tickling the kernel differently (e.g. piglit runs some more tests in > parallel now). It's not reproducible every time, but it just happened a > second time (with an amd-staging-drm-next commit from about a week ago). I took a closer look, and I think I see what's happening. The use-after-free happens at: reservation_object_wait_timeout_rcu+0xe02/0xe90 ttm_bo_cleanup_refs_and_unlock+0x271/0x990 [ttm] (ttm_bo.c:530) ttm_mem_evict_first+0x263/0x4a0 [ttm] The memory was freed at: [reservation_object_fini] ttm_bo_cleanup_refs_and_unlock+0x517/0x990 [ttm] (ttm_bo.c:564) ttm_mem_evict_first+0x263/0x4a0 [ttm] So it's two processes handling the same BO in ttm_mem_evict_first -> ttm_bo_cleanup_refs_and_unlock. The first one unreserved the BO before calling reservation_object_wait_timeout_rcu. Meanwhile, the other one manages to reserve the BO and get all the way to the end of ttm_bo_cleanup_refs_and_unlock, destroying bo->ttm_resv. Then reservation_object_wait_timeout_rcu in the first process still accesses memory which bo->ttm_resv pointed to => boom. -- Earthling Michel Dänzer | http://www.amd.com Libre software enthusiast | Mesa and X developer ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [Mesa-dev] [PATCH] amdgpu: Add R600_DEBUG flag to reserve VMID per ctx.
I addressed the feedback and pushed the patch. Marek On Tue, Oct 31, 2017 at 4:50 PM, Michel Dänzer wrote: > On 31/10/17 04:40 PM, Andrey Grodzovsky wrote: >> Signed-off-by: Andrey Grodzovsky > > [...] > >> diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c >> b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c >> index 8f43e93..1155492 100644 >> --- a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c >> +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c >> @@ -256,6 +256,14 @@ static struct radeon_winsys_ctx >> *amdgpu_ctx_create(struct radeon_winsys *ws) >>goto error_create; >> } >> >> + if (ctx->ws->reserve_vmid) { >> +r = amdgpu_vm_reserve_vmid(ctx->ctx, 0); >> +if (r) { >> + fprintf(stderr, "amdgpu: amdgpu_cs_ctx_create failed. (%i)\n", >> r); > > This should say "amdgpu: amdgpu_vm_reserve_vmid failed. (%i)\n". > > > -- > Earthling Michel Dänzer | http://www.amd.com > Libre software enthusiast | Mesa and X developer > ___ > mesa-dev mailing list > mesa-...@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH] amdgpu: Add R600_DEBUG flag to reserve VMID per ctx.
On 2017-10-31 11:50 AM, Samuel Pitoiset wrote: On 10/31/2017 04:40 PM, Andrey Grodzovsky wrote: Signed-off-by: Andrey Grodzovsky --- configure.ac | 2 +- src/gallium/drivers/radeon/r600_pipe_common.c | 1 + src/gallium/drivers/radeon/r600_pipe_common.h | 1 + src/gallium/winsys/amdgpu/drm/amdgpu_cs.c | 8 src/gallium/winsys/amdgpu/drm/amdgpu_cs.h | 3 +++ src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c | 1 + src/gallium/winsys/amdgpu/drm/amdgpu_winsys.h | 1 + 7 files changed, 16 insertions(+), 1 deletion(-) diff --git a/configure.ac b/configure.ac index 9aa02f5..efc653a 100644 --- a/configure.ac +++ b/configure.ac @@ -74,7 +74,7 @@ AC_SUBST([OPENCL_VERSION]) # in the first entry. LIBDRM_REQUIRED=2.4.75 LIBDRM_RADEON_REQUIRED=2.4.71 -LIBDRM_AMDGPU_REQUIRED=2.4.85 +LIBDRM_AMDGPU_REQUIRED=2.4.86 LIBDRM_INTEL_REQUIRED=2.4.75 LIBDRM_NVVIEUX_REQUIRED=2.4.66 LIBDRM_NOUVEAU_REQUIRED=2.4.66 diff --git a/src/gallium/drivers/radeon/r600_pipe_common.c b/src/gallium/drivers/radeon/r600_pipe_common.c index b77d859..3364dac 100644 --- a/src/gallium/drivers/radeon/r600_pipe_common.c +++ b/src/gallium/drivers/radeon/r600_pipe_common.c @@ -851,6 +851,7 @@ static const struct debug_named_value common_debug_options[] = { { "dpbb", DBG(DPBB), "Enable DPBB." }, { "dfsm", DBG(DFSM), "Enable DFSM." }, { "nooutoforder", DBG(NO_OUT_OF_ORDER), "Disable out-of-order rasterization" }, +{ "reserve_vmid", DBG(RESERVE_VMID), "Force VMID resrvation per context." }, "reservation". Can you also explain a bit what that stuff is? :) It allows to have fixed VMID assigned to a process, otherwise each time a command is processed the next available VMID is assigned according to a policy in KMD (amdgpu_vm_grab_id). Thanks, Andrey Thanks! DEBUG_NAMED_VALUE_END /* must be last */ }; diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h b/src/gallium/drivers/radeon/r600_pipe_common.h index a7c91cb..94c8d4f 100644 --- a/src/gallium/drivers/radeon/r600_pipe_common.h +++ b/src/gallium/drivers/radeon/r600_pipe_common.h @@ -107,6 +107,7 @@ enum { DBG_NO_DISCARD_RANGE, DBG_NO_WC, DBG_CHECK_VM, +DBG_RESERVE_VMID, /* 3D engine options: */ DBG_SWITCH_ON_EOP, diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c index 8f43e93..1155492 100644 --- a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c @@ -256,6 +256,14 @@ static struct radeon_winsys_ctx *amdgpu_ctx_create(struct radeon_winsys *ws) goto error_create; } + if (ctx->ws->reserve_vmid) { + r = amdgpu_vm_reserve_vmid(ctx->ctx, 0); + if (r) { +fprintf(stderr, "amdgpu: amdgpu_cs_ctx_create failed. (%i)\n", r); +goto error_create; + } + } + alloc_buffer.alloc_size = ctx->ws->info.gart_page_size; alloc_buffer.phys_alignment = ctx->ws->info.gart_page_size; alloc_buffer.preferred_heap = AMDGPU_GEM_DOMAIN_GTT; diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.h b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.h index 1c3d0f0..d023841 100644 --- a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.h +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.h @@ -162,6 +162,9 @@ static inline bool amdgpu_fence_is_syncobj(struct amdgpu_fence *fence) static inline void amdgpu_ctx_unref(struct amdgpu_ctx *ctx) { if (p_atomic_dec_zero(&ctx->refcount)) { + if (ctx->ws->reserve_vmid) + amdgpu_vm_unreserve_vmid(ctx->ctx, 0); + amdgpu_cs_ctx_free(ctx->ctx); amdgpu_bo_free(ctx->user_fence_bo); FREE(ctx); diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c b/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c index a210a27..b80a988 100644 --- a/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c @@ -73,6 +73,7 @@ static bool do_winsys_init(struct amdgpu_winsys *ws, int fd) ws->check_vm = strstr(debug_get_option("R600_DEBUG", ""), "check_vm") != NULL; ws->debug_all_bos = debug_get_option_all_bos(); + ws->reserve_vmid = strstr(debug_get_option("R600_DEBUG", ""), "reserve_vmid") != NULL; return true; diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.h b/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.h index 8b62e2d..b4a3422 100644 --- a/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.h +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.h @@ -81,6 +81,7 @@ struct amdgpu_winsys { bool check_vm; bool debug_all_bos; + bool reserve_vmid; /* List of all allocated buffers */ mtx_t global_bo_list_lock; ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH] amdgpu: Add R600_DEBUG flag to reserve VMID per ctx.
On 31/10/17 04:40 PM, Andrey Grodzovsky wrote: > Signed-off-by: Andrey Grodzovsky [...] > diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c > b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c > index 8f43e93..1155492 100644 > --- a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c > +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c > @@ -256,6 +256,14 @@ static struct radeon_winsys_ctx > *amdgpu_ctx_create(struct radeon_winsys *ws) >goto error_create; > } > > + if (ctx->ws->reserve_vmid) { > +r = amdgpu_vm_reserve_vmid(ctx->ctx, 0); > +if (r) { > + fprintf(stderr, "amdgpu: amdgpu_cs_ctx_create failed. (%i)\n", > r); This should say "amdgpu: amdgpu_vm_reserve_vmid failed. (%i)\n". -- Earthling Michel Dänzer | http://www.amd.com Libre software enthusiast | Mesa and X developer ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH] amdgpu: Add R600_DEBUG flag to reserve VMID per ctx.
On 10/31/2017 04:40 PM, Andrey Grodzovsky wrote: Signed-off-by: Andrey Grodzovsky --- configure.ac | 2 +- src/gallium/drivers/radeon/r600_pipe_common.c | 1 + src/gallium/drivers/radeon/r600_pipe_common.h | 1 + src/gallium/winsys/amdgpu/drm/amdgpu_cs.c | 8 src/gallium/winsys/amdgpu/drm/amdgpu_cs.h | 3 +++ src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c | 1 + src/gallium/winsys/amdgpu/drm/amdgpu_winsys.h | 1 + 7 files changed, 16 insertions(+), 1 deletion(-) diff --git a/configure.ac b/configure.ac index 9aa02f5..efc653a 100644 --- a/configure.ac +++ b/configure.ac @@ -74,7 +74,7 @@ AC_SUBST([OPENCL_VERSION]) # in the first entry. LIBDRM_REQUIRED=2.4.75 LIBDRM_RADEON_REQUIRED=2.4.71 -LIBDRM_AMDGPU_REQUIRED=2.4.85 +LIBDRM_AMDGPU_REQUIRED=2.4.86 LIBDRM_INTEL_REQUIRED=2.4.75 LIBDRM_NVVIEUX_REQUIRED=2.4.66 LIBDRM_NOUVEAU_REQUIRED=2.4.66 diff --git a/src/gallium/drivers/radeon/r600_pipe_common.c b/src/gallium/drivers/radeon/r600_pipe_common.c index b77d859..3364dac 100644 --- a/src/gallium/drivers/radeon/r600_pipe_common.c +++ b/src/gallium/drivers/radeon/r600_pipe_common.c @@ -851,6 +851,7 @@ static const struct debug_named_value common_debug_options[] = { { "dpbb", DBG(DPBB), "Enable DPBB." }, { "dfsm", DBG(DFSM), "Enable DFSM." }, { "nooutoforder", DBG(NO_OUT_OF_ORDER), "Disable out-of-order rasterization" }, + { "reserve_vmid", DBG(RESERVE_VMID), "Force VMID resrvation per context." }, "reservation". Can you also explain a bit what that stuff is? :) Thanks! DEBUG_NAMED_VALUE_END /* must be last */ }; diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h b/src/gallium/drivers/radeon/r600_pipe_common.h index a7c91cb..94c8d4f 100644 --- a/src/gallium/drivers/radeon/r600_pipe_common.h +++ b/src/gallium/drivers/radeon/r600_pipe_common.h @@ -107,6 +107,7 @@ enum { DBG_NO_DISCARD_RANGE, DBG_NO_WC, DBG_CHECK_VM, + DBG_RESERVE_VMID, /* 3D engine options: */ DBG_SWITCH_ON_EOP, diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c index 8f43e93..1155492 100644 --- a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c @@ -256,6 +256,14 @@ static struct radeon_winsys_ctx *amdgpu_ctx_create(struct radeon_winsys *ws) goto error_create; } + if (ctx->ws->reserve_vmid) { + r = amdgpu_vm_reserve_vmid(ctx->ctx, 0); + if (r) { + fprintf(stderr, "amdgpu: amdgpu_cs_ctx_create failed. (%i)\n", r); + goto error_create; + } + } + alloc_buffer.alloc_size = ctx->ws->info.gart_page_size; alloc_buffer.phys_alignment = ctx->ws->info.gart_page_size; alloc_buffer.preferred_heap = AMDGPU_GEM_DOMAIN_GTT; diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.h b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.h index 1c3d0f0..d023841 100644 --- a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.h +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.h @@ -162,6 +162,9 @@ static inline bool amdgpu_fence_is_syncobj(struct amdgpu_fence *fence) static inline void amdgpu_ctx_unref(struct amdgpu_ctx *ctx) { if (p_atomic_dec_zero(&ctx->refcount)) { + if (ctx->ws->reserve_vmid) + amdgpu_vm_unreserve_vmid(ctx->ctx, 0); + amdgpu_cs_ctx_free(ctx->ctx); amdgpu_bo_free(ctx->user_fence_bo); FREE(ctx); diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c b/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c index a210a27..b80a988 100644 --- a/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c @@ -73,6 +73,7 @@ static bool do_winsys_init(struct amdgpu_winsys *ws, int fd) ws->check_vm = strstr(debug_get_option("R600_DEBUG", ""), "check_vm") != NULL; ws->debug_all_bos = debug_get_option_all_bos(); + ws->reserve_vmid = strstr(debug_get_option("R600_DEBUG", ""), "reserve_vmid") != NULL; return true; diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.h b/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.h index 8b62e2d..b4a3422 100644 --- a/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.h +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.h @@ -81,6 +81,7 @@ struct amdgpu_winsys { bool check_vm; bool debug_all_bos; + bool reserve_vmid; /* List of all allocated buffers */ mtx_t global_bo_list_lock; ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH] amdgpu: Add R600_DEBUG flag to reserve VMID per ctx.
Signed-off-by: Andrey Grodzovsky --- configure.ac | 2 +- src/gallium/drivers/radeon/r600_pipe_common.c | 1 + src/gallium/drivers/radeon/r600_pipe_common.h | 1 + src/gallium/winsys/amdgpu/drm/amdgpu_cs.c | 8 src/gallium/winsys/amdgpu/drm/amdgpu_cs.h | 3 +++ src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c | 1 + src/gallium/winsys/amdgpu/drm/amdgpu_winsys.h | 1 + 7 files changed, 16 insertions(+), 1 deletion(-) diff --git a/configure.ac b/configure.ac index 9aa02f5..efc653a 100644 --- a/configure.ac +++ b/configure.ac @@ -74,7 +74,7 @@ AC_SUBST([OPENCL_VERSION]) # in the first entry. LIBDRM_REQUIRED=2.4.75 LIBDRM_RADEON_REQUIRED=2.4.71 -LIBDRM_AMDGPU_REQUIRED=2.4.85 +LIBDRM_AMDGPU_REQUIRED=2.4.86 LIBDRM_INTEL_REQUIRED=2.4.75 LIBDRM_NVVIEUX_REQUIRED=2.4.66 LIBDRM_NOUVEAU_REQUIRED=2.4.66 diff --git a/src/gallium/drivers/radeon/r600_pipe_common.c b/src/gallium/drivers/radeon/r600_pipe_common.c index b77d859..3364dac 100644 --- a/src/gallium/drivers/radeon/r600_pipe_common.c +++ b/src/gallium/drivers/radeon/r600_pipe_common.c @@ -851,6 +851,7 @@ static const struct debug_named_value common_debug_options[] = { { "dpbb", DBG(DPBB), "Enable DPBB." }, { "dfsm", DBG(DFSM), "Enable DFSM." }, { "nooutoforder", DBG(NO_OUT_OF_ORDER), "Disable out-of-order rasterization" }, + { "reserve_vmid", DBG(RESERVE_VMID), "Force VMID resrvation per context." }, DEBUG_NAMED_VALUE_END /* must be last */ }; diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h b/src/gallium/drivers/radeon/r600_pipe_common.h index a7c91cb..94c8d4f 100644 --- a/src/gallium/drivers/radeon/r600_pipe_common.h +++ b/src/gallium/drivers/radeon/r600_pipe_common.h @@ -107,6 +107,7 @@ enum { DBG_NO_DISCARD_RANGE, DBG_NO_WC, DBG_CHECK_VM, + DBG_RESERVE_VMID, /* 3D engine options: */ DBG_SWITCH_ON_EOP, diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c index 8f43e93..1155492 100644 --- a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c @@ -256,6 +256,14 @@ static struct radeon_winsys_ctx *amdgpu_ctx_create(struct radeon_winsys *ws) goto error_create; } + if (ctx->ws->reserve_vmid) { + r = amdgpu_vm_reserve_vmid(ctx->ctx, 0); + if (r) { + fprintf(stderr, "amdgpu: amdgpu_cs_ctx_create failed. (%i)\n", r); + goto error_create; + } + } + alloc_buffer.alloc_size = ctx->ws->info.gart_page_size; alloc_buffer.phys_alignment = ctx->ws->info.gart_page_size; alloc_buffer.preferred_heap = AMDGPU_GEM_DOMAIN_GTT; diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.h b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.h index 1c3d0f0..d023841 100644 --- a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.h +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.h @@ -162,6 +162,9 @@ static inline bool amdgpu_fence_is_syncobj(struct amdgpu_fence *fence) static inline void amdgpu_ctx_unref(struct amdgpu_ctx *ctx) { if (p_atomic_dec_zero(&ctx->refcount)) { + if (ctx->ws->reserve_vmid) + amdgpu_vm_unreserve_vmid(ctx->ctx, 0); + amdgpu_cs_ctx_free(ctx->ctx); amdgpu_bo_free(ctx->user_fence_bo); FREE(ctx); diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c b/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c index a210a27..b80a988 100644 --- a/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c @@ -73,6 +73,7 @@ static bool do_winsys_init(struct amdgpu_winsys *ws, int fd) ws->check_vm = strstr(debug_get_option("R600_DEBUG", ""), "check_vm") != NULL; ws->debug_all_bos = debug_get_option_all_bos(); + ws->reserve_vmid = strstr(debug_get_option("R600_DEBUG", ""), "reserve_vmid") != NULL; return true; diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.h b/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.h index 8b62e2d..b4a3422 100644 --- a/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.h +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.h @@ -81,6 +81,7 @@ struct amdgpu_winsys { bool check_vm; bool debug_all_bos; + bool reserve_vmid; /* List of all allocated buffers */ mtx_t global_bo_list_lock; -- 2.7.4 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH] drm/amdgpu: fix error handling in amdgpu_bo_do_create
On 31/10/17 04:25 PM, Christian König wrote: > Am 31.10.2017 um 16:14 schrieb Michel Dänzer: >> On 31/10/17 04:03 PM, Christian König wrote: >>> Am 31.10.2017 um 09:43 schrieb Michel Dänzer: On 31/10/17 09:37 AM, Christian König wrote: > From: Christian König > > The bo structure is freed up in case of an error, so we can't do any > accounting if that happens. > > Signed-off-by: Christian König > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 6 +++--- > 1 file changed, 3 insertions(+), 3 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c > index e44b880eefdd..ff6f842655d1 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c > @@ -369,6 +369,9 @@ static int amdgpu_bo_do_create(struct > amdgpu_device *adev, > r = ttm_bo_init_reserved(&adev->mman.bdev, &bo->tbo, size, > type, > &bo->placement, page_align, !kernel, NULL, > acc_size, sg, resv, &amdgpu_ttm_bo_destroy); > + if (unlikely(r != 0)) > + return r; > + > bytes_moved = atomic64_read(&adev->num_bytes_moved) - > initial_bytes_moved; > if (adev->mc.visible_vram_size < adev->mc.real_vram_size && > @@ -378,9 +381,6 @@ static int amdgpu_bo_do_create(struct > amdgpu_device *adev, > else > amdgpu_cs_report_moved_bytes(adev, bytes_moved, 0); > - if (unlikely(r != 0)) > - return r; > - > if (kernel) > bo->tbo.priority = 1; > Hmm. If it's at least theoretically possible that ttm_bo_init_reserved moves some BOs before returning an error, we should instead make the accounting safe WRT bo having been freed. But assuming it's not possible, >>> I have made patches for this back in April and send them to the list, >>> but the value is rather low and you need to change a lot of places in >>> other drivers as well. >> At least in this case, simply moving the >> >> if (adev->mc.visible_vram_size < adev->mc.real_vram_size && >> bo->tbo.mem.mem_type == TTM_PL_VRAM && >> bo->tbo.mem.start < adev->mc.visible_vram_size >> PAGE_SHIFT) >> >> test before the ttm_bo_init_reserved call, and then passing bytes_moved >> or 0 for the third parameter of amdgpu_cs_report_moved_bytes depending >> on its result, would work as well, wouldn't it? > No, that won't work. Neither bo->tbo.mem.mem_type nor bo->tbo.mem.start > are determined yet before the call to ttm_bo_init(). > > The root problem is that we have a mis-assumption about how visible VRAM > works here (e.g. it doesn't handle the case where a BO is partially in > visible VRAM) and that we use the global bytes moved counter as > indicator for the local operation (e.g. to know how many bytes moved the > current client). > > The only real solution is to give ttm_bo_init_* and ttm_bo_validate a > context parameter where we can note how much work we had to do to > fulfill the request. Right, though in the meantime I wonder if over-accounting (i.e. calling amdgpu_cs_report_moved_bytes(adev, bytes_moved, bytes_moved)) in the failure case wouldn't be better than not accounting at all. -- Earthling Michel Dänzer | http://www.amd.com Libre software enthusiast | Mesa and X developer ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH] drm/amdgpu: fix error handling in amdgpu_bo_do_create
Am 31.10.2017 um 16:14 schrieb Michel Dänzer: On 31/10/17 04:03 PM, Christian König wrote: Am 31.10.2017 um 09:43 schrieb Michel Dänzer: On 31/10/17 09:37 AM, Christian König wrote: From: Christian König The bo structure is freed up in case of an error, so we can't do any accounting if that happens. Signed-off-by: Christian König --- drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index e44b880eefdd..ff6f842655d1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -369,6 +369,9 @@ static int amdgpu_bo_do_create(struct amdgpu_device *adev, r = ttm_bo_init_reserved(&adev->mman.bdev, &bo->tbo, size, type, &bo->placement, page_align, !kernel, NULL, acc_size, sg, resv, &amdgpu_ttm_bo_destroy); + if (unlikely(r != 0)) + return r; + bytes_moved = atomic64_read(&adev->num_bytes_moved) - initial_bytes_moved; if (adev->mc.visible_vram_size < adev->mc.real_vram_size && @@ -378,9 +381,6 @@ static int amdgpu_bo_do_create(struct amdgpu_device *adev, else amdgpu_cs_report_moved_bytes(adev, bytes_moved, 0); - if (unlikely(r != 0)) - return r; - if (kernel) bo->tbo.priority = 1; Hmm. If it's at least theoretically possible that ttm_bo_init_reserved moves some BOs before returning an error, we should instead make the accounting safe WRT bo having been freed. But assuming it's not possible, I have made patches for this back in April and send them to the list, but the value is rather low and you need to change a lot of places in other drivers as well. At least in this case, simply moving the if (adev->mc.visible_vram_size < adev->mc.real_vram_size && bo->tbo.mem.mem_type == TTM_PL_VRAM && bo->tbo.mem.start < adev->mc.visible_vram_size >> PAGE_SHIFT) test before the ttm_bo_init_reserved call, and then passing bytes_moved or 0 for the third parameter of amdgpu_cs_report_moved_bytes depending on its result, would work as well, wouldn't it? No, that won't work. Neither bo->tbo.mem.mem_type nor bo->tbo.mem.start are determined yet before the call to ttm_bo_init(). The root problem is that we have a mis-assumption about how visible VRAM works here (e.g. it doesn't handle the case where a BO is partially in visible VRAM) and that we use the global bytes moved counter as indicator for the local operation (e.g. to know how many bytes moved the current client). The only real solution is to give ttm_bo_init_* and ttm_bo_validate a context parameter where we can note how much work we had to do to fulfill the request. Regards, Christian. ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH] drm/amdgpu: fix error handling in amdgpu_bo_do_create
On 31/10/17 04:14 PM, Michel Dänzer wrote: > On 31/10/17 04:03 PM, Christian König wrote: >> Am 31.10.2017 um 09:43 schrieb Michel Dänzer: >>> On 31/10/17 09:37 AM, Christian König wrote: From: Christian König The bo structure is freed up in case of an error, so we can't do any accounting if that happens. Signed-off-by: Christian König --- drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index e44b880eefdd..ff6f842655d1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -369,6 +369,9 @@ static int amdgpu_bo_do_create(struct amdgpu_device *adev, r = ttm_bo_init_reserved(&adev->mman.bdev, &bo->tbo, size, type, &bo->placement, page_align, !kernel, NULL, acc_size, sg, resv, &amdgpu_ttm_bo_destroy); + if (unlikely(r != 0)) + return r; + bytes_moved = atomic64_read(&adev->num_bytes_moved) - initial_bytes_moved; if (adev->mc.visible_vram_size < adev->mc.real_vram_size && @@ -378,9 +381,6 @@ static int amdgpu_bo_do_create(struct amdgpu_device *adev, else amdgpu_cs_report_moved_bytes(adev, bytes_moved, 0); - if (unlikely(r != 0)) - return r; - if (kernel) bo->tbo.priority = 1; >>> Hmm. If it's at least theoretically possible that ttm_bo_init_reserved >>> moves some BOs before returning an error, we should instead make the >>> accounting safe WRT bo having been freed. But assuming it's not possible, >> >> I have made patches for this back in April and send them to the list, >> but the value is rather low and you need to change a lot of places in >> other drivers as well. > > At least in this case, simply moving the > > if (adev->mc.visible_vram_size < adev->mc.real_vram_size && > bo->tbo.mem.mem_type == TTM_PL_VRAM && > bo->tbo.mem.start < adev->mc.visible_vram_size >> PAGE_SHIFT) > > test before the ttm_bo_init_reserved call, [...] ... except that's not possible, because at least bo->tbo.mem.start isn't decided yet before calling ttm_bo_init_reserved. Never mind the noise. -- Earthling Michel Dänzer | http://www.amd.com Libre software enthusiast | Mesa and X developer ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH] drm/amdgpu: fix error handling in amdgpu_bo_do_create
On 31/10/17 04:03 PM, Christian König wrote: > Am 31.10.2017 um 09:43 schrieb Michel Dänzer: >> On 31/10/17 09:37 AM, Christian König wrote: >>> From: Christian König >>> >>> The bo structure is freed up in case of an error, so we can't do any >>> accounting if that happens. >>> >>> Signed-off-by: Christian König >>> --- >>> drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 6 +++--- >>> 1 file changed, 3 insertions(+), 3 deletions(-) >>> >>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c >>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c >>> index e44b880eefdd..ff6f842655d1 100644 >>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c >>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c >>> @@ -369,6 +369,9 @@ static int amdgpu_bo_do_create(struct >>> amdgpu_device *adev, >>> r = ttm_bo_init_reserved(&adev->mman.bdev, &bo->tbo, size, type, >>> &bo->placement, page_align, !kernel, NULL, >>> acc_size, sg, resv, &amdgpu_ttm_bo_destroy); >>> + if (unlikely(r != 0)) >>> + return r; >>> + >>> bytes_moved = atomic64_read(&adev->num_bytes_moved) - >>> initial_bytes_moved; >>> if (adev->mc.visible_vram_size < adev->mc.real_vram_size && >>> @@ -378,9 +381,6 @@ static int amdgpu_bo_do_create(struct >>> amdgpu_device *adev, >>> else >>> amdgpu_cs_report_moved_bytes(adev, bytes_moved, 0); >>> - if (unlikely(r != 0)) >>> - return r; >>> - >>> if (kernel) >>> bo->tbo.priority = 1; >>> >> Hmm. If it's at least theoretically possible that ttm_bo_init_reserved >> moves some BOs before returning an error, we should instead make the >> accounting safe WRT bo having been freed. But assuming it's not possible, > > I have made patches for this back in April and send them to the list, > but the value is rather low and you need to change a lot of places in > other drivers as well. At least in this case, simply moving the if (adev->mc.visible_vram_size < adev->mc.real_vram_size && bo->tbo.mem.mem_type == TTM_PL_VRAM && bo->tbo.mem.start < adev->mc.visible_vram_size >> PAGE_SHIFT) test before the ttm_bo_init_reserved call, and then passing bytes_moved or 0 for the third parameter of amdgpu_cs_report_moved_bytes depending on its result, would work as well, wouldn't it? -- Earthling Michel Dänzer | http://www.amd.com Libre software enthusiast | Mesa and X developer ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH] drm/amdgpu: release exclusive mode after hw_init if no kfd
On 10/30/2017 12:57 AM, Pixel Ding wrote: > From: pding > > Move kfd probe prior to device init. Release exclusive mode > after hw_init if kfd is not enabled. > What happens if only the amdgpu module is loaded at startup, and then the user manually loads the amdkfd module at some point later on. Will the driver still behave correctly in this case with this patch? -Tom > Signed-off-by: pding > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 3 +++ > drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c| 5 +++-- > 2 files changed, 6 insertions(+), 2 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c > index 400dfaa..e46ec51 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c > @@ -1716,6 +1716,9 @@ static int amdgpu_init(struct amdgpu_device *adev) > adev->ip_blocks[i].status.hw = true; > } > > + if (amdgpu_sriov_vf(adev) && !adev->kfd) > + amdgpu_virt_release_full_gpu(adev, true); > + > return 0; > } > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c > index 3e9760d..e91907c 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c > @@ -138,6 +138,8 @@ int amdgpu_driver_load_kms(struct drm_device *dev, > unsigned long flags) > !pci_is_thunderbolt_attached(dev->pdev)) > flags |= AMD_IS_PX; > > + amdgpu_amdkfd_device_probe(adev); > + > /* amdgpu_device_init should report only fatal error >* like memory allocation failure or iomapping failure, >* or memory manager initialization failure, it must > @@ -170,7 +172,6 @@ int amdgpu_driver_load_kms(struct drm_device *dev, > unsigned long flags) > "Error during ACPI methods call\n"); > } > > - amdgpu_amdkfd_device_probe(adev); > amdgpu_amdkfd_device_init(adev); > > if (amdgpu_device_is_px(dev)) { > @@ -182,7 +183,7 @@ int amdgpu_driver_load_kms(struct drm_device *dev, > unsigned long flags) > pm_runtime_put_autosuspend(dev->dev); > } > > - if (amdgpu_sriov_vf(adev)) > + if (amdgpu_sriov_vf(adev) && adev->kfd) > amdgpu_virt_release_full_gpu(adev, true); > > out: > ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH] drm/amdgpu: fix error handling in amdgpu_bo_do_create
Am 31.10.2017 um 09:43 schrieb Michel Dänzer: On 31/10/17 09:37 AM, Christian König wrote: From: Christian König The bo structure is freed up in case of an error, so we can't do any accounting if that happens. Signed-off-by: Christian König --- drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index e44b880eefdd..ff6f842655d1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -369,6 +369,9 @@ static int amdgpu_bo_do_create(struct amdgpu_device *adev, r = ttm_bo_init_reserved(&adev->mman.bdev, &bo->tbo, size, type, &bo->placement, page_align, !kernel, NULL, acc_size, sg, resv, &amdgpu_ttm_bo_destroy); + if (unlikely(r != 0)) + return r; + bytes_moved = atomic64_read(&adev->num_bytes_moved) - initial_bytes_moved; if (adev->mc.visible_vram_size < adev->mc.real_vram_size && @@ -378,9 +381,6 @@ static int amdgpu_bo_do_create(struct amdgpu_device *adev, else amdgpu_cs_report_moved_bytes(adev, bytes_moved, 0); - if (unlikely(r != 0)) - return r; - if (kernel) bo->tbo.priority = 1; Hmm. If it's at least theoretically possible that ttm_bo_init_reserved moves some BOs before returning an error, we should instead make the accounting safe WRT bo having been freed. But assuming it's not possible, I have made patches for this back in April and send them to the list, but the value is rather low and you need to change a lot of places in other drivers as well. So I've gave up on that set for now because of lack of time. Reviewed-by: Michel Dänzer Thanks for the review, Christian. ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
RE: [PATCH 1/2] drm/amdgpu:add fw-vram-usage for atomfirmware
> -Original Message- > From: amd-gfx [mailto:amd-gfx-boun...@lists.freedesktop.org] On Behalf > Of Monk Liu > Sent: Tuesday, October 31, 2017 7:55 AM > To: amd-gfx@lists.freedesktop.org > Cc: Liu, Monk > Subject: [PATCH 1/2] drm/amdgpu:add fw-vram-usage for atomfirmware > > otherwise PF & VF exchange is broken > > Change-Id: Icbb44e640ba1c6e61914cbd234e92de001496195 > Signed-off-by: Monk Liu > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c | 26 > > 1 file changed, 22 insertions(+), 4 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c > index f9ffe8e..455ad63 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c > @@ -71,19 +71,37 @@ int > amdgpu_atomfirmware_allocate_fb_scratch(struct amdgpu_device *adev) > struct atom_context *ctx = adev->mode_info.atom_context; > int index = > get_index_into_master_table(atom_master_list_of_data_tables_v2_1, > vram_usagebyfirmware); > + struct vram_usagebyfirmware_v2_1 * firmware_usage; > + uint32_t start_addr, size; > uint16_t data_offset; > int usage_bytes = 0; > > if (amdgpu_atom_parse_data_header(ctx, index, NULL, NULL, NULL, > &data_offset)) { > - struct vram_usagebyfirmware_v2_1 *firmware_usage = > - (struct vram_usagebyfirmware_v2_1 *)(ctx->bios + > data_offset); > + printk("data_offset=%x\n", data_offset); Drop this debugging leftover. > + firmware_usage = (struct vram_usagebyfirmware_v2_1 > *)(ctx->bios + data_offset); > > - DRM_DEBUG("atom firmware requested %08x %dkb fw > %dkb drv\n", > + DRM_INFO("atom firmware requested %08x %dkb fw %dkb Any reason to make this non-debug? > drv\n", > le32_to_cpu(firmware_usage- > >start_address_in_kb), > le16_to_cpu(firmware_usage- > >used_by_firmware_in_kb), > le16_to_cpu(firmware_usage- > >used_by_driver_in_kb)); > > - usage_bytes = le16_to_cpu(firmware_usage- > >used_by_driver_in_kb) * 1024; > + start_addr = le32_to_cpu(firmware_usage- > >start_address_in_kb); > + size = le16_to_cpu(firmware_usage- > >used_by_firmware_in_kb); > + > + printk("start_addr = %x, size=%x\n", start_addr, size); More debug leftovers. With the above comments addressed, the patch is: Reviewed-by: Alex Deucher > + > + if ((uint32_t)(start_addr & > ATOM_VRAM_OPERATION_FLAGS_MASK) == > + > (uint32_t)(ATOM_VRAM_BLOCK_SRIOV_MSG_SHARE_RESERVATIO > N << > + ATOM_VRAM_OPERATION_FLAGS_SHIFT)) { > + /* Firmware request VRAM reservation for SR-IOV */ > + adev->fw_vram_usage.start_offset = (start_addr & > + > (~ATOM_VRAM_OPERATION_FLAGS_MASK)) << 10; > + adev->fw_vram_usage.size = size << 10; > + /* Use the default scratch size */ > + usage_bytes = 0; > + } else { > + usage_bytes = le16_to_cpu(firmware_usage- > >used_by_driver_in_kb) << 10; > + } > } > ctx->scratch_size_bytes = 0; > if (usage_bytes == 0) > -- > 2.7.4 > > ___ > amd-gfx mailing list > amd-gfx@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/amd-gfx ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH 2/2] drm/amdgpu:cleanup deprecated gpu reset logic
Am 31.10.2017 um 12:55 schrieb Monk Liu: trigger gpu reset/recovery from illegle instruction IRQ is deprecated long time ago, we already switch to recover gpu by TDR triggering. now please set lockup_timeout to non-zero value in driver loading to enable TDR. The patch is ok, but NAK to the general approach. We should have illegal instruction/access alongside the timeout. But instead of trying to trigger the reset directly inform the scheduler that the we detected a problem on the engine. The scheduler can then cancel the timeout and do the appropriate things. This patch would be ok if you install this new functionality directly after removing the old (and broken) one. Regards, Christian. Change-Id: I45a576a97fd9859e1098e785ce857c2cf5adfba5 Signed-off-by: Monk Liu --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 - drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c | 21 - drivers/gpu/drm/amd/amdgpu/cik_sdma.c | 1 - drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c | 2 -- drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c | 3 --- drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 2 -- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 2 -- drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c | 1 - drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c | 1 - drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | 1 - drivers/gpu/drm/amd/amdgpu/si_dma.c | 1 - 11 files changed, 36 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 6e89be5..b3453e3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -1459,7 +1459,6 @@ struct amdgpu_device { boolshutdown; boolneed_dma32; boolaccel_working; - struct work_struct reset_work; struct notifier_block acpi_nb; struct amdgpu_i2c_chan *i2c_bus[AMDGPU_MAX_I2C_BUS]; struct amdgpu_debugfs debugfs[AMDGPU_DEBUGFS_MAX_COMPONENTS]; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c index c340774..989b530 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c @@ -73,23 +73,6 @@ static void amdgpu_hotplug_work_func(struct work_struct *work) drm_helper_hpd_irq_event(dev); } -/** - * amdgpu_irq_reset_work_func - execute gpu reset - * - * @work: work struct - * - * Execute scheduled gpu reset (cayman+). - * This function is called when the irq handler - * thinks we need a gpu reset. - */ -static void amdgpu_irq_reset_work_func(struct work_struct *work) -{ - struct amdgpu_device *adev = container_of(work, struct amdgpu_device, - reset_work); - - if (!amdgpu_sriov_vf(adev)) - amdgpu_gpu_recover(adev, NULL); -} /* Disable *all* interrupts */ static void amdgpu_irq_disable_all(struct amdgpu_device *adev) @@ -251,14 +234,11 @@ int amdgpu_irq_init(struct amdgpu_device *adev) amdgpu_hotplug_work_func); } - INIT_WORK(&adev->reset_work, amdgpu_irq_reset_work_func); - adev->irq.installed = true; r = drm_irq_install(adev->ddev, adev->ddev->pdev->irq); if (r) { adev->irq.installed = false; flush_work(&adev->hotplug_work); - cancel_work_sync(&adev->reset_work); return r; } @@ -283,7 +263,6 @@ void amdgpu_irq_fini(struct amdgpu_device *adev) if (adev->irq.msi_enabled) pci_disable_msi(adev->pdev); flush_work(&adev->hotplug_work); - cancel_work_sync(&adev->reset_work); } for (i = 0; i < AMDGPU_IH_CLIENTID_MAX; ++i) { diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c index ed26dcb..c8d9bc1 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c +++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c @@ -1227,7 +1227,6 @@ static int cik_sdma_process_illegal_inst_irq(struct amdgpu_device *adev, struct amdgpu_iv_entry *entry) { DRM_ERROR("Illegal instruction in SDMA command stream\n"); - schedule_work(&adev->reset_work); return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c index 9430d48..25b32b7 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c @@ -3443,7 +3443,6 @@ static int gfx_v6_0_priv_reg_irq(struct amdgpu_device *adev, struct amdgpu_iv_entry *entry) { DRM_ERROR("Illegal register access in command stream\n"); - schedule_work(&adev->reset_work); return 0; } @@ -3452,7 +3451,6 @@ static int gfx_v6_0_priv_inst_irq(struct amdgpu_device *adev, struct am
[ANNOUNCE] libdrm 2.4.87
-BEGIN PGP SIGNED MESSAGE- Hash: SHA1 libdrm 2.4.87 has been released. Marek Olšák (2): amdgpu: fix 32-bit build configure.ac: bump version for release git tag: libdrm-2.4.87 https://dri.freedesktop.org/libdrm/libdrm-2.4.87.tar.bz2 MD5: b4f9063838559d08649d45fec2d1184a libdrm-2.4.87.tar.bz2 SHA1: 26b976c8901061399b8b934854b960f5b0761f36 libdrm-2.4.87.tar.bz2 SHA256: 4c1c5293bdbfa248e029d3e6446767e17a3208387a719ec9da2d20c19849ed48 libdrm-2.4.87.tar.bz2 SHA512: 9ce259d38eaffbcaf82a6975ffa513307da6750bcc31a53cade5717f854151b0e7b6d4ce25c0b518cab5a542c544d5984329f216dc4cdc9369b4adae19a68e48 libdrm-2.4.87.tar.bz2 PGP: https://dri.freedesktop.org/libdrm/libdrm-2.4.87.tar.bz2.sig https://dri.freedesktop.org/libdrm/libdrm-2.4.87.tar.gz MD5: 8ec8c39c3a42db9a9a8d8a8c67290883 libdrm-2.4.87.tar.gz SHA1: 0bd17d00d1843a1ddca98352abcdebb468dd7384 libdrm-2.4.87.tar.gz SHA256: e813e2e8d7d9f071200317ee6b2ef6e35d4d497a57488ed3d44551e5970fc41a libdrm-2.4.87.tar.gz SHA512: 178541f488c62ca45ffb9bc04c737fa54560df7a845f0cd8b02684da5a51d80c0e96f2327e0b84ad4127501a64d960533b90e0e8c08275bbe6e536c0bd533d09 libdrm-2.4.87.tar.gz PGP: https://dri.freedesktop.org/libdrm/libdrm-2.4.87.tar.gz.sig -BEGIN PGP SIGNATURE- Version: GnuPG v1 iQEcBAEBAgAGBQJZ+JAJAAoJEP3RXVrO8PKxe3kH/11MckJZYJRx5Gnwkb8o2kMx y9omlgbpA9CI8eRZTIOD/XZ/yxRLziPDw8NLFuJwMQvYTiSbv7RY1ESqtFHv5U3y d343Yux85TWyfKrPTc11PdHgK+qRnoLWVPH4Nr0SJLuBX1kqiNe0ws0VSvoNlFxB 7ayGsoF71W4lZIWkEi+U59InilDHEZv+C1xt1CMR1pBegBPZ5BOumEScHkuHH8Fu v7A5q1eMtHbU1kkjuqVoZj/igYFZB3yPN11iByGHgz/N9iXXRMopt8X/AK6yFHjN pzWd/qA/sr3PP35kU4Xo5ENmsEWSxyV86pMX38D2zekLovhPkstLfEz/wuhEC+o= =7DH2 -END PGP SIGNATURE- ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH] drm/radeon: deprecate and remove KFD interface
Am 31.10.2017 um 11:44 schrieb Oded Gabbay: Don't have any strong objection, but I just want to ask if current users can just move to using amdgpu on KV and their current usermode stack will work as usual. Yes, I think so. btw, are there any "current users" that you are aware of ? Not the slightest idea, but from Felix comments at least the current user mode stack + old kernel won't work. Regards, Christian. On Mon, Oct 30, 2017 at 3:16 PM, Christian König wrote: From: Christian König To quote Felix: "For testing KV with current user mode stack, please use amdgpu. I don't expect this to work with radeon and I'm not planning to spend any effort on making radeon work with a current user mode stack." Only compile tested, but should be straight forward. Signed-off-by: Christian König --- drivers/gpu/drm/amd/amdkfd/Kconfig | 2 +- drivers/gpu/drm/radeon/Makefile | 3 +- drivers/gpu/drm/radeon/cik.c| 14 +- drivers/gpu/drm/radeon/cikd.h | 2 - drivers/gpu/drm/radeon/radeon.h | 3 - drivers/gpu/drm/radeon/radeon_drv.c | 10 - drivers/gpu/drm/radeon/radeon_kfd.c | 901 drivers/gpu/drm/radeon/radeon_kfd.h | 47 -- drivers/gpu/drm/radeon/radeon_kms.c | 7 - 9 files changed, 4 insertions(+), 985 deletions(-) delete mode 100644 drivers/gpu/drm/radeon/radeon_kfd.c delete mode 100644 drivers/gpu/drm/radeon/radeon_kfd.h diff --git a/drivers/gpu/drm/amd/amdkfd/Kconfig b/drivers/gpu/drm/amd/amdkfd/Kconfig index e13c67c..bc5a294 100644 --- a/drivers/gpu/drm/amd/amdkfd/Kconfig +++ b/drivers/gpu/drm/amd/amdkfd/Kconfig @@ -4,6 +4,6 @@ config HSA_AMD tristate "HSA kernel driver for AMD GPU devices" - depends on (DRM_RADEON || DRM_AMDGPU) && AMD_IOMMU_V2 && X86_64 + depends on DRM_AMDGPU && AMD_IOMMU_V2 && X86_64 help Enable this if you want to use HSA features on AMD GPU devices. diff --git a/drivers/gpu/drm/radeon/Makefile b/drivers/gpu/drm/radeon/Makefile index be16c63..cf3e598 100644 --- a/drivers/gpu/drm/radeon/Makefile +++ b/drivers/gpu/drm/radeon/Makefile @@ -102,8 +102,7 @@ radeon-y += \ radeon-y += \ radeon_vce.o \ vce_v1_0.o \ - vce_v2_0.o \ - radeon_kfd.o + vce_v2_0.o radeon-$(CONFIG_VGA_SWITCHEROO) += radeon_atpx_handler.o radeon-$(CONFIG_ACPI) += radeon_acpi.o diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c index 3cb6c55..898f9a0 100644 --- a/drivers/gpu/drm/radeon/cik.c +++ b/drivers/gpu/drm/radeon/cik.c @@ -33,7 +33,6 @@ #include "cik_blit_shaders.h" #include "radeon_ucode.h" #include "clearstate_ci.h" -#include "radeon_kfd.h" #define SH_MEM_CONFIG_GFX_DEFAULT \ ALIGNMENT_MODE(SH_MEM_ALIGNMENT_MODE_UNALIGNED) @@ -5684,10 +5683,9 @@ int cik_vm_init(struct radeon_device *rdev) /* * number of VMs * VMID 0 is reserved for System -* radeon graphics/compute will use VMIDs 1-7 -* amdkfd will use VMIDs 8-15 +* radeon graphics/compute will use VMIDs 1-15 */ - rdev->vm_manager.nvm = RADEON_NUM_OF_VMIDS; + rdev->vm_manager.nvm = 16; /* base offset of vram pages */ if (rdev->flags & RADEON_IS_IGP) { u64 tmp = RREG32(MC_VM_FB_OFFSET); @@ -7589,9 +7587,6 @@ int cik_irq_process(struct radeon_device *rdev) /* wptr/rptr are in bytes! */ ring_index = rptr / 4; - radeon_kfd_interrupt(rdev, - (const void *) &rdev->ih.ring[ring_index]); - src_id = le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff; src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfff; ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff; @@ -8486,10 +8481,6 @@ static int cik_startup(struct radeon_device *rdev) if (r) return r; - r = radeon_kfd_resume(rdev); - if (r) - return r; - return 0; } @@ -8538,7 +8529,6 @@ int cik_resume(struct radeon_device *rdev) */ int cik_suspend(struct radeon_device *rdev) { - radeon_kfd_suspend(rdev); radeon_pm_suspend(rdev); radeon_audio_fini(rdev); radeon_vm_manager_fini(rdev); diff --git a/drivers/gpu/drm/radeon/cikd.h b/drivers/gpu/drm/radeon/cikd.h index e210154..cda16fc 100644 --- a/drivers/gpu/drm/radeon/cikd.h +++ b/drivers/gpu/drm/radeon/cikd.h @@ -30,8 +30,6 @@ #define CIK_RB_BITMAP_WIDTH_PER_SH 2 #define HAWAII_RB_BITMAP_WIDTH_PER_SH 4 -#define RADEON_NUM_OF_VMIDS8 - /* DIDT IND registers */ #define DIDT_SQ_CTRL0 0x0 # define DIDT_CTRL_EN (1 << 0) diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index ec63bc5..d94741b 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -2456,9 +2456
[ANNOUNCE] libdrm 2.4.86
-BEGIN PGP SIGNED MESSAGE- Hash: SHA1 libdrm 2.4.86 has been released. Andrey Grodzovsky (2): amdgpu: Add wrappers for AMDGPU_VM IOCTL. amdgpu: Add VMID reservation per GPU context test. Dave Airlie (2): drm: sync drm headers from drm-next. drm/syncobj: fix some whitespace issues Marek Olšák (1): configure.ac: bump version for release git tag: libdrm-2.4.86 https://dri.freedesktop.org/libdrm/libdrm-2.4.86.tar.bz2 MD5: 8dabf172c9695c24d000cbddfa849a82 libdrm-2.4.86.tar.bz2 SHA1: b2c428326843dfaa5fb88899b8dfaa43b792dc70 libdrm-2.4.86.tar.bz2 SHA256: 4b010002ce158b4b6537ddb5a0f94a628be19727a71f1ab556a276829989072b libdrm-2.4.86.tar.bz2 SHA512: 7d07b66db104728d013830da96fff678580c7505f1292cc7713b99f1e34e80276bcdc40a4377d67cb7ea73708e3fba29ba4062bd68d84845943b68090b9d7a01 libdrm-2.4.86.tar.bz2 PGP: https://dri.freedesktop.org/libdrm/libdrm-2.4.86.tar.bz2.sig https://dri.freedesktop.org/libdrm/libdrm-2.4.86.tar.gz MD5: 23ffd64496823a673b290268953da034 libdrm-2.4.86.tar.gz SHA1: f7d277ef964b767f94215bf3f464e83fcdb742bd libdrm-2.4.86.tar.gz SHA256: 90fca042dd5c619fff2771ab634c69010f25c582071519aa284860758fac2963 libdrm-2.4.86.tar.gz SHA512: c22f748ecdc7910c0ebac618a936af3b47f3f5f0179be823a72920a1c1d47d52e3e832e0de5d37758d723a5ab7f0b5ef39dcb3ed7158904f9ca9f70509e7ee8b libdrm-2.4.86.tar.gz PGP: https://dri.freedesktop.org/libdrm/libdrm-2.4.86.tar.gz.sig -BEGIN PGP SIGNATURE- Version: GnuPG v1 iQEcBAEBAgAGBQJZ+IdmAAoJEP3RXVrO8PKxrG8IAKRCbKenZyeQlyYVav5xTfvJ y++EOE7HqJ3K8LJnIwiQqjY6M67B8UVXrY536GCc5laMLGV7789wAbMpLVfGYS3w /uUpeEf1NmIehnfQHQvHUT6JxeuHamsVFJ7Svfbfpx2JJ7pFoH4mIynxGlPjjv97 BBJg6suK8orD0yL/73A2x1FXDLoKHE4PGuCTZecpnb4y/EVdatFFE7wlb0uxGRrs ZQroSptRmn3kc/KCIhoLYPrhXvRgiNMgvyL/orsL0tL5NlMBjJHOFO+WhjHZBvBC wGTEwJRnRoJkTklC5O3G/wXZMjQHi7ExOF+7yIB/4EjYuNeEPYpYHzJjnHfrvto= =Kd+P -END PGP SIGNATURE- ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 2/2] drm/amdgpu:cleanup deprecated gpu reset logic
trigger gpu reset/recovery from illegle instruction IRQ is deprecated long time ago, we already switch to recover gpu by TDR triggering. now please set lockup_timeout to non-zero value in driver loading to enable TDR. Change-Id: I45a576a97fd9859e1098e785ce857c2cf5adfba5 Signed-off-by: Monk Liu --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 - drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c | 21 - drivers/gpu/drm/amd/amdgpu/cik_sdma.c | 1 - drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c | 2 -- drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c | 3 --- drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 2 -- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 2 -- drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c | 1 - drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c | 1 - drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | 1 - drivers/gpu/drm/amd/amdgpu/si_dma.c | 1 - 11 files changed, 36 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 6e89be5..b3453e3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -1459,7 +1459,6 @@ struct amdgpu_device { boolshutdown; boolneed_dma32; boolaccel_working; - struct work_struct reset_work; struct notifier_block acpi_nb; struct amdgpu_i2c_chan *i2c_bus[AMDGPU_MAX_I2C_BUS]; struct amdgpu_debugfs debugfs[AMDGPU_DEBUGFS_MAX_COMPONENTS]; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c index c340774..989b530 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c @@ -73,23 +73,6 @@ static void amdgpu_hotplug_work_func(struct work_struct *work) drm_helper_hpd_irq_event(dev); } -/** - * amdgpu_irq_reset_work_func - execute gpu reset - * - * @work: work struct - * - * Execute scheduled gpu reset (cayman+). - * This function is called when the irq handler - * thinks we need a gpu reset. - */ -static void amdgpu_irq_reset_work_func(struct work_struct *work) -{ - struct amdgpu_device *adev = container_of(work, struct amdgpu_device, - reset_work); - - if (!amdgpu_sriov_vf(adev)) - amdgpu_gpu_recover(adev, NULL); -} /* Disable *all* interrupts */ static void amdgpu_irq_disable_all(struct amdgpu_device *adev) @@ -251,14 +234,11 @@ int amdgpu_irq_init(struct amdgpu_device *adev) amdgpu_hotplug_work_func); } - INIT_WORK(&adev->reset_work, amdgpu_irq_reset_work_func); - adev->irq.installed = true; r = drm_irq_install(adev->ddev, adev->ddev->pdev->irq); if (r) { adev->irq.installed = false; flush_work(&adev->hotplug_work); - cancel_work_sync(&adev->reset_work); return r; } @@ -283,7 +263,6 @@ void amdgpu_irq_fini(struct amdgpu_device *adev) if (adev->irq.msi_enabled) pci_disable_msi(adev->pdev); flush_work(&adev->hotplug_work); - cancel_work_sync(&adev->reset_work); } for (i = 0; i < AMDGPU_IH_CLIENTID_MAX; ++i) { diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c index ed26dcb..c8d9bc1 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c +++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c @@ -1227,7 +1227,6 @@ static int cik_sdma_process_illegal_inst_irq(struct amdgpu_device *adev, struct amdgpu_iv_entry *entry) { DRM_ERROR("Illegal instruction in SDMA command stream\n"); - schedule_work(&adev->reset_work); return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c index 9430d48..25b32b7 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c @@ -3443,7 +3443,6 @@ static int gfx_v6_0_priv_reg_irq(struct amdgpu_device *adev, struct amdgpu_iv_entry *entry) { DRM_ERROR("Illegal register access in command stream\n"); - schedule_work(&adev->reset_work); return 0; } @@ -3452,7 +3451,6 @@ static int gfx_v6_0_priv_inst_irq(struct amdgpu_device *adev, struct amdgpu_iv_entry *entry) { DRM_ERROR("Illegal instruction in command stream\n"); - schedule_work(&adev->reset_work); return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c index 3c2b15a..b0e127d 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c @@ -5020,7 +5020,6 @@ static int gfx_v7_0_priv_reg_irq(struct amdgpu_device *adev, struct amdgpu_iv_entry *entry) {
[PATCH 1/2] drm/amdgpu:add fw-vram-usage for atomfirmware
otherwise PF & VF exchange is broken Change-Id: Icbb44e640ba1c6e61914cbd234e92de001496195 Signed-off-by: Monk Liu --- drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c | 26 1 file changed, 22 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c index f9ffe8e..455ad63 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c @@ -71,19 +71,37 @@ int amdgpu_atomfirmware_allocate_fb_scratch(struct amdgpu_device *adev) struct atom_context *ctx = adev->mode_info.atom_context; int index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1, vram_usagebyfirmware); + struct vram_usagebyfirmware_v2_1 * firmware_usage; + uint32_t start_addr, size; uint16_t data_offset; int usage_bytes = 0; if (amdgpu_atom_parse_data_header(ctx, index, NULL, NULL, NULL, &data_offset)) { - struct vram_usagebyfirmware_v2_1 *firmware_usage = - (struct vram_usagebyfirmware_v2_1 *)(ctx->bios + data_offset); + printk("data_offset=%x\n", data_offset); + firmware_usage = (struct vram_usagebyfirmware_v2_1 *)(ctx->bios + data_offset); - DRM_DEBUG("atom firmware requested %08x %dkb fw %dkb drv\n", + DRM_INFO("atom firmware requested %08x %dkb fw %dkb drv\n", le32_to_cpu(firmware_usage->start_address_in_kb), le16_to_cpu(firmware_usage->used_by_firmware_in_kb), le16_to_cpu(firmware_usage->used_by_driver_in_kb)); - usage_bytes = le16_to_cpu(firmware_usage->used_by_driver_in_kb) * 1024; + start_addr = le32_to_cpu(firmware_usage->start_address_in_kb); + size = le16_to_cpu(firmware_usage->used_by_firmware_in_kb); + + printk("start_addr = %x, size=%x\n", start_addr, size); + + if ((uint32_t)(start_addr & ATOM_VRAM_OPERATION_FLAGS_MASK) == + (uint32_t)(ATOM_VRAM_BLOCK_SRIOV_MSG_SHARE_RESERVATION << + ATOM_VRAM_OPERATION_FLAGS_SHIFT)) { + /* Firmware request VRAM reservation for SR-IOV */ + adev->fw_vram_usage.start_offset = (start_addr & + (~ATOM_VRAM_OPERATION_FLAGS_MASK)) << 10; + adev->fw_vram_usage.size = size << 10; + /* Use the default scratch size */ + usage_bytes = 0; + } else { + usage_bytes = le16_to_cpu(firmware_usage->used_by_driver_in_kb) << 10; + } } ctx->scratch_size_bytes = 0; if (usage_bytes == 0) -- 2.7.4 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
RE: [PATCH 1/2] drm/amdgpu: return error when sriov access requests get timeout
>@@ -450,7 +450,7 @@ static int xgpu_vi_send_access_requests(struct >amdgpu_device *adev, > pr_err("Doesn't get ack from pf, continue\n"); Why don't return error in this place? > } > >- return 0; >+ return r; >} static int xgpu_vi_request_reset(struct amdgpu_device *adev) -- 2.9.5 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH] drm/radeon: deprecate and remove KFD interface
Don't have any strong objection, but I just want to ask if current users can just move to using amdgpu on KV and their current usermode stack will work as usual. btw, are there any "current users" that you are aware of ? On Mon, Oct 30, 2017 at 3:16 PM, Christian König wrote: > From: Christian König > > To quote Felix: "For testing KV with current user mode stack, please use > amdgpu. I don't expect this to work with radeon and I'm not planning to spend > any effort on making radeon work with a current user mode stack." > > Only compile tested, but should be straight forward. > > Signed-off-by: Christian König > --- > drivers/gpu/drm/amd/amdkfd/Kconfig | 2 +- > drivers/gpu/drm/radeon/Makefile | 3 +- > drivers/gpu/drm/radeon/cik.c| 14 +- > drivers/gpu/drm/radeon/cikd.h | 2 - > drivers/gpu/drm/radeon/radeon.h | 3 - > drivers/gpu/drm/radeon/radeon_drv.c | 10 - > drivers/gpu/drm/radeon/radeon_kfd.c | 901 > > drivers/gpu/drm/radeon/radeon_kfd.h | 47 -- > drivers/gpu/drm/radeon/radeon_kms.c | 7 - > 9 files changed, 4 insertions(+), 985 deletions(-) > delete mode 100644 drivers/gpu/drm/radeon/radeon_kfd.c > delete mode 100644 drivers/gpu/drm/radeon/radeon_kfd.h > > diff --git a/drivers/gpu/drm/amd/amdkfd/Kconfig > b/drivers/gpu/drm/amd/amdkfd/Kconfig > index e13c67c..bc5a294 100644 > --- a/drivers/gpu/drm/amd/amdkfd/Kconfig > +++ b/drivers/gpu/drm/amd/amdkfd/Kconfig > @@ -4,6 +4,6 @@ > > config HSA_AMD > tristate "HSA kernel driver for AMD GPU devices" > - depends on (DRM_RADEON || DRM_AMDGPU) && AMD_IOMMU_V2 && X86_64 > + depends on DRM_AMDGPU && AMD_IOMMU_V2 && X86_64 > help > Enable this if you want to use HSA features on AMD GPU devices. > diff --git a/drivers/gpu/drm/radeon/Makefile b/drivers/gpu/drm/radeon/Makefile > index be16c63..cf3e598 100644 > --- a/drivers/gpu/drm/radeon/Makefile > +++ b/drivers/gpu/drm/radeon/Makefile > @@ -102,8 +102,7 @@ radeon-y += \ > radeon-y += \ > radeon_vce.o \ > vce_v1_0.o \ > - vce_v2_0.o \ > - radeon_kfd.o > + vce_v2_0.o > > radeon-$(CONFIG_VGA_SWITCHEROO) += radeon_atpx_handler.o > radeon-$(CONFIG_ACPI) += radeon_acpi.o > diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c > index 3cb6c55..898f9a0 100644 > --- a/drivers/gpu/drm/radeon/cik.c > +++ b/drivers/gpu/drm/radeon/cik.c > @@ -33,7 +33,6 @@ > #include "cik_blit_shaders.h" > #include "radeon_ucode.h" > #include "clearstate_ci.h" > -#include "radeon_kfd.h" > > #define SH_MEM_CONFIG_GFX_DEFAULT \ > ALIGNMENT_MODE(SH_MEM_ALIGNMENT_MODE_UNALIGNED) > @@ -5684,10 +5683,9 @@ int cik_vm_init(struct radeon_device *rdev) > /* > * number of VMs > * VMID 0 is reserved for System > -* radeon graphics/compute will use VMIDs 1-7 > -* amdkfd will use VMIDs 8-15 > +* radeon graphics/compute will use VMIDs 1-15 > */ > - rdev->vm_manager.nvm = RADEON_NUM_OF_VMIDS; > + rdev->vm_manager.nvm = 16; > /* base offset of vram pages */ > if (rdev->flags & RADEON_IS_IGP) { > u64 tmp = RREG32(MC_VM_FB_OFFSET); > @@ -7589,9 +7587,6 @@ int cik_irq_process(struct radeon_device *rdev) > /* wptr/rptr are in bytes! */ > ring_index = rptr / 4; > > - radeon_kfd_interrupt(rdev, > - (const void *) &rdev->ih.ring[ring_index]); > - > src_id = le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff; > src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & > 0xfff; > ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff; > @@ -8486,10 +8481,6 @@ static int cik_startup(struct radeon_device *rdev) > if (r) > return r; > > - r = radeon_kfd_resume(rdev); > - if (r) > - return r; > - > return 0; > } > > @@ -8538,7 +8529,6 @@ int cik_resume(struct radeon_device *rdev) > */ > int cik_suspend(struct radeon_device *rdev) > { > - radeon_kfd_suspend(rdev); > radeon_pm_suspend(rdev); > radeon_audio_fini(rdev); > radeon_vm_manager_fini(rdev); > diff --git a/drivers/gpu/drm/radeon/cikd.h b/drivers/gpu/drm/radeon/cikd.h > index e210154..cda16fc 100644 > --- a/drivers/gpu/drm/radeon/cikd.h > +++ b/drivers/gpu/drm/radeon/cikd.h > @@ -30,8 +30,6 @@ > #define CIK_RB_BITMAP_WIDTH_PER_SH 2 > #define HAWAII_RB_BITMAP_WIDTH_PER_SH 4 > > -#define RADEON_NUM_OF_VMIDS8 > - > /* DIDT IND registers */ > #define DIDT_SQ_CTRL0 0x0 > # define DIDT_CTRL_EN (1 << 0) > diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h > index ec63bc5..d94741b 100644 > --- a/drivers/gpu/drm/radeon/radeon.h > +++ b/drivers/gpu/drm/radeon/radeon.h > @@ -2456,9 +2456,6 @@ struct radeon_
Re: [PATCH] drm/amdgpu: release exclusive mode after hw_init if no kfd (v2)
I sent you an answer in the other email thread On Tue, Oct 31, 2017 at 9:06 AM, Ding, Pixel wrote: > Hi Oded, > > Would you please help reviewing the V2 patch? > > — > Sincerely Yours, > Pixel > > > > > > > > > On 31/10/2017, 9:47 AM, "Pixel Ding" wrote: > >>From: pding >> >>Move kfd probe prior to device init. Release exclusive mode >>after hw_init if kfd is not enabled. >> >>v2: >> - pass pdev param >> >>Signed-off-by: pding >>--- >> drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 5 +++-- >> drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 3 ++- >> drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 3 +++ >> drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c| 5 +++-- >> 4 files changed, 11 insertions(+), 5 deletions(-) >> >>diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c >>b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c >>index c70cda0..f0f5d0e 100644 >>--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c >>+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c >>@@ -68,7 +68,8 @@ void amdgpu_amdkfd_fini(void) >> } >> } >> >>-void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev) >>+void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev, >>+ struct pci_dev *pdev) >> { >> const struct kfd2kgd_calls *kfd2kgd; >> >>@@ -90,7 +91,7 @@ void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev) >> } >> >> adev->kfd = kgd2kfd->probe((struct kgd_dev *)adev, >>- adev->pdev, kfd2kgd); >>+ pdev, kfd2kgd); >> } >> >> void amdgpu_amdkfd_device_init(struct amdgpu_device *adev) >>diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h >>b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h >>index 8d689ab..707c892 100644 >>--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h >>+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h >>@@ -44,7 +44,8 @@ void amdgpu_amdkfd_suspend(struct amdgpu_device *adev); >> int amdgpu_amdkfd_resume(struct amdgpu_device *adev); >> void amdgpu_amdkfd_interrupt(struct amdgpu_device *adev, >> const void *ih_ring_entry); >>-void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev); >>+void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev, >>+ struct pci_dev *pdev); >> void amdgpu_amdkfd_device_init(struct amdgpu_device *adev); >> void amdgpu_amdkfd_device_fini(struct amdgpu_device *adev); >> >>diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c >>b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c >>index 2ff2c54..daa2098 100644 >>--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c >>+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c >>@@ -1716,6 +1716,9 @@ static int amdgpu_init(struct amdgpu_device *adev) >> adev->ip_blocks[i].status.hw = true; >> } >> >>+ if (amdgpu_sriov_vf(adev) && !adev->kfd) >>+ amdgpu_virt_release_full_gpu(adev, true); >>+ >> return 0; >> } >> >>diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c >>b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c >>index 3e9760d..f872052 100644 >>--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c >>+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c >>@@ -138,6 +138,8 @@ int amdgpu_driver_load_kms(struct drm_device *dev, >>unsigned long flags) >> !pci_is_thunderbolt_attached(dev->pdev)) >> flags |= AMD_IS_PX; >> >>+ amdgpu_amdkfd_device_probe(adev, dev->pdev); >>+ >> /* amdgpu_device_init should report only fatal error >>* like memory allocation failure or iomapping failure, >>* or memory manager initialization failure, it must >>@@ -170,7 +172,6 @@ int amdgpu_driver_load_kms(struct drm_device *dev, >>unsigned long flags) >> "Error during ACPI methods call\n"); >> } >> >>- amdgpu_amdkfd_device_probe(adev); >> amdgpu_amdkfd_device_init(adev); >> >> if (amdgpu_device_is_px(dev)) { >>@@ -182,7 +183,7 @@ int amdgpu_driver_load_kms(struct drm_device *dev, >>unsigned long flags) >> pm_runtime_put_autosuspend(dev->dev); >> } >> >>- if (amdgpu_sriov_vf(adev)) >>+ if (amdgpu_sriov_vf(adev) && adev->kfd) >> amdgpu_virt_release_full_gpu(adev, true); >> >> out: >>-- >>2.9.5 >> ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH] drm/amdgpu: release exclusive mode after hw_init if no kfd
I would pass only pdev to kgd2kfd_probe, instead of adev. Then, I would initialize adev inside kgd2kfd_device_init That way, you can call kgd2kfd_probe before device_init of amdgpu and you can know if amdkfd is supposed to handle this device. Does that make sense ? On Mon, Oct 30, 2017 at 11:13 AM, Ding, Pixel wrote: > Another option is adding a function with the same logics as top half probe to > determine if KFD is enabled or not, like amdgpu_amdkfd_enabled(). > > I think it’s okay to pass pdev separately. The KFD probe implementation also > has pdev parameter, it doesn’t retrieve that from adev, while the middle > layer does this. > > Which one do you prefer? > > — > Sincerely Yours, > Pixel > > > > > > > > On 30/10/2017, 4:52 PM, "Oded Gabbay" wrote: > >>Except from pdev, kfd doesn't access any other fields in adev, so >>passing pdev as a different parameter seems fine. >>The problem with that approach is we need to remember for the future >>not to access other adev fields, and that seems risky and not correct >> >> >> >> >>On Mon, Oct 30, 2017 at 10:30 AM, Ding, Pixel wrote: >>> Get your point. I don’t know why the test is passed however will revise >>> later. >>> >>> I definitely want the know if KFD is enabled before device init. Any >>> suggestion? Or do you mind if the pdev is passed to probe in other way? >>> >>> — >>> Sincerely Yours, >>> Pixel >>> >>> >>> >>> >>> >>> >>> >>> >>> On 30/10/2017, 4:20 PM, "Oded Gabbay" wrote: >>> On Mon, Oct 30, 2017 at 9:57 AM, Pixel Ding wrote: > From: pding > > Move kfd probe prior to device init. Release exclusive mode > after hw_init if kfd is not enabled. > > Signed-off-by: pding > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 3 +++ > drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c| 5 +++-- > 2 files changed, 6 insertions(+), 2 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c > index 400dfaa..e46ec51 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c > @@ -1716,6 +1716,9 @@ static int amdgpu_init(struct amdgpu_device *adev) > adev->ip_blocks[i].status.hw = true; > } > > + if (amdgpu_sriov_vf(adev) && !adev->kfd) > + amdgpu_virt_release_full_gpu(adev, true); > + > return 0; > } > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c > index 3e9760d..e91907c 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c > @@ -138,6 +138,8 @@ int amdgpu_driver_load_kms(struct drm_device *dev, > unsigned long flags) > !pci_is_thunderbolt_attached(dev->pdev)) > flags |= AMD_IS_PX; > > + amdgpu_amdkfd_device_probe(adev); > + > /* amdgpu_device_init should report only fatal error > * like memory allocation failure or iomapping failure, > * or memory manager initialization failure, it must > @@ -170,7 +172,6 @@ int amdgpu_driver_load_kms(struct drm_device *dev, > unsigned long flags) > "Error during ACPI methods call\n"); > } > > - amdgpu_amdkfd_device_probe(adev); > amdgpu_amdkfd_device_init(adev); > > if (amdgpu_device_is_px(dev)) { > @@ -182,7 +183,7 @@ int amdgpu_driver_load_kms(struct drm_device *dev, > unsigned long flags) > pm_runtime_put_autosuspend(dev->dev); > } > > - if (amdgpu_sriov_vf(adev)) > + if (amdgpu_sriov_vf(adev) && adev->kfd) > amdgpu_virt_release_full_gpu(adev, true); > > out: > -- > 2.9.5 > > ___ > amd-gfx mailing list > amd-gfx@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/amd-gfx The amdkfd probe function uses the pdev field inside adev. That field is initialized in device init, so you can't call amdkfd probe before that function. Oded ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
RE: [PATCH 1/2] drm/amdgpu: return error when sriov access requests get timeout (v2)
Reviewed-By: Xiangliang Yu -Original Message- From: amd-gfx [mailto:amd-gfx-boun...@lists.freedesktop.org] On Behalf Of Pixel Ding Sent: Tuesday, October 31, 2017 4:17 PM To: amd-gfx@lists.freedesktop.org; Yu, Xiangliang Cc: Ding, Pixel Subject: [PATCH 1/2] drm/amdgpu: return error when sriov access requests get timeout (v2) From: pding v2: - readable Reported-by: Sun Gary Signed-off-by: pding --- drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c | 6 -- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c index 818ec0f..2b435c0 100644 --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c @@ -446,8 +446,10 @@ static int xgpu_vi_send_access_requests(struct amdgpu_device *adev, request == IDH_REQ_GPU_FINI_ACCESS || request == IDH_REQ_GPU_RESET_ACCESS) { r = xgpu_vi_poll_msg(adev, IDH_READY_TO_ACCESS_GPU); - if (r) - pr_err("Doesn't get ack from pf, continue\n"); + if (r) { + pr_err("Doesn't get ack from pf, give up\n"); + return r; + } } return 0; -- 2.9.5 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
RE: [PATCH 2/2] drm/amdgpu: retry init if exclusive mode request is failed
RB-by: Xiangliang Yu -Original Message- From: amd-gfx [mailto:amd-gfx-boun...@lists.freedesktop.org] On Behalf Of Pixel Ding Sent: Tuesday, October 31, 2017 3:11 PM To: amd-gfx@lists.freedesktop.org Cc: Ding, Pixel Subject: [PATCH 2/2] drm/amdgpu: retry init if exclusive mode request is failed From: pding This is caused of that hypervisor fails to handle request, one known issue is MMIO unblocking timeout. In theory we can retry init here. Signed-off-by: pding --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index daa2098..ef01aa3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -1625,7 +1625,7 @@ static int amdgpu_early_init(struct amdgpu_device *adev) if (amdgpu_sriov_vf(adev)) { r = amdgpu_virt_request_full_gpu(adev, true); if (r) - return r; + return -EAGAIN; } for (i = 0; i < adev->num_ip_blocks; i++) { -- 2.9.5 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH 1/2] dma-buf: keep only not signaled fence in reservation_object_add_shared_replace v2
On 2017年10月31日 16:55, Christian König wrote: Am 31.10.2017 um 09:51 schrieb Chunming Zhou: On 2017年10月31日 16:43, Christian König wrote: The amdgpu issue to also need signaled fences in the reservation objects should be fixed by now. Optimize the list by keeping only the not signaled yet fences around. v2: temporary put the signaled fences at the end of the new container Signed-off-by: Christian König --- drivers/dma-buf/reservation.c | 36 ++-- 1 file changed, 26 insertions(+), 10 deletions(-) diff --git a/drivers/dma-buf/reservation.c b/drivers/dma-buf/reservation.c index b44d9d7db347..6fc794576997 100644 --- a/drivers/dma-buf/reservation.c +++ b/drivers/dma-buf/reservation.c @@ -145,8 +145,8 @@ reservation_object_add_shared_replace(struct reservation_object *obj, struct reservation_object_list *fobj, struct dma_fence *fence) { - unsigned i; struct dma_fence *old_fence = NULL; + unsigned i, j, k; dma_fence_get(fence); @@ -162,9 +162,7 @@ reservation_object_add_shared_replace(struct reservation_object *obj, * references from the old struct are carried over to * the new. */ - fobj->shared_count = old->shared_count; - - for (i = 0; i < old->shared_count; ++i) { + for (i = 0, j = 0, k = fobj->shared_max; i < old->shared_count; ++i) { struct dma_fence *check; check = rcu_dereference_protected(old->shared[i], @@ -172,10 +170,18 @@ reservation_object_add_shared_replace(struct reservation_object *obj, if (!old_fence && check->context == fence->context) { old_fence = check; - RCU_INIT_POINTER(fobj->shared[i], fence); - } else - RCU_INIT_POINTER(fobj->shared[i], check); + RCU_INIT_POINTER(fobj->shared[j++], fence); + } else if (!dma_fence_is_signaled(check)) { + RCU_INIT_POINTER(fobj->shared[j++], check); + } else { + /* + * Temporary save the signaled fences at the end of the + * new container + */ + RCU_INIT_POINTER(fobj->shared[--k], check); + } } + fobj->shared_count = j; if (!old_fence) { RCU_INIT_POINTER(fobj->shared[fobj->shared_count], fence); Seems you still don't address here, how do you sure fobj->shared[fobj->shared_count] is null? if not NULL, the old one will be leak. I've put them at the end of the container, see above "k = fobj->shared_max". Since shared_max >> shared_count (at least twice as large in this situation) we should be on the save side. Ah, oops, Reviewed-by: Chunming Zhou for the series. Thanks, David Zhou Regards, Christian. Regards, David Zhou fobj->shared_count++; @@ -192,10 +198,20 @@ reservation_object_add_shared_replace(struct reservation_object *obj, write_seqcount_end(&obj->seq); preempt_enable(); - if (old) - kfree_rcu(old, rcu); - dma_fence_put(old_fence); + + if (!old) + return; + + /* Drop the references to the signaled fences */ + for (i = k; i < fobj->shared_max; ++i) { + struct dma_fence *f; + + f = rcu_dereference_protected(fobj->shared[i], + reservation_object_held(obj)); + dma_fence_put(f); + } + kfree_rcu(old, rcu); } /** ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH 1/2] dma-buf: keep only not signaled fence in reservation_object_add_shared_replace v2
Am 31.10.2017 um 09:51 schrieb Chunming Zhou: On 2017年10月31日 16:43, Christian König wrote: The amdgpu issue to also need signaled fences in the reservation objects should be fixed by now. Optimize the list by keeping only the not signaled yet fences around. v2: temporary put the signaled fences at the end of the new container Signed-off-by: Christian König --- drivers/dma-buf/reservation.c | 36 ++-- 1 file changed, 26 insertions(+), 10 deletions(-) diff --git a/drivers/dma-buf/reservation.c b/drivers/dma-buf/reservation.c index b44d9d7db347..6fc794576997 100644 --- a/drivers/dma-buf/reservation.c +++ b/drivers/dma-buf/reservation.c @@ -145,8 +145,8 @@ reservation_object_add_shared_replace(struct reservation_object *obj, struct reservation_object_list *fobj, struct dma_fence *fence) { - unsigned i; struct dma_fence *old_fence = NULL; + unsigned i, j, k; dma_fence_get(fence); @@ -162,9 +162,7 @@ reservation_object_add_shared_replace(struct reservation_object *obj, * references from the old struct are carried over to * the new. */ - fobj->shared_count = old->shared_count; - - for (i = 0; i < old->shared_count; ++i) { + for (i = 0, j = 0, k = fobj->shared_max; i < old->shared_count; ++i) { struct dma_fence *check; check = rcu_dereference_protected(old->shared[i], @@ -172,10 +170,18 @@ reservation_object_add_shared_replace(struct reservation_object *obj, if (!old_fence && check->context == fence->context) { old_fence = check; - RCU_INIT_POINTER(fobj->shared[i], fence); - } else - RCU_INIT_POINTER(fobj->shared[i], check); + RCU_INIT_POINTER(fobj->shared[j++], fence); + } else if (!dma_fence_is_signaled(check)) { + RCU_INIT_POINTER(fobj->shared[j++], check); + } else { + /* + * Temporary save the signaled fences at the end of the + * new container + */ + RCU_INIT_POINTER(fobj->shared[--k], check); + } } + fobj->shared_count = j; if (!old_fence) { RCU_INIT_POINTER(fobj->shared[fobj->shared_count], fence); Seems you still don't address here, how do you sure fobj->shared[fobj->shared_count] is null? if not NULL, the old one will be leak. I've put them at the end of the container, see above "k = fobj->shared_max". Since shared_max >> shared_count (at least twice as large in this situation) we should be on the save side. Regards, Christian. Regards, David Zhou fobj->shared_count++; @@ -192,10 +198,20 @@ reservation_object_add_shared_replace(struct reservation_object *obj, write_seqcount_end(&obj->seq); preempt_enable(); - if (old) - kfree_rcu(old, rcu); - dma_fence_put(old_fence); + + if (!old) + return; + + /* Drop the references to the signaled fences */ + for (i = k; i < fobj->shared_max; ++i) { + struct dma_fence *f; + + f = rcu_dereference_protected(fobj->shared[i], + reservation_object_held(obj)); + dma_fence_put(f); + } + kfree_rcu(old, rcu); } /** ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH 1/2] dma-buf: keep only not signaled fence in reservation_object_add_shared_replace v2
On 2017年10月31日 16:43, Christian König wrote: The amdgpu issue to also need signaled fences in the reservation objects should be fixed by now. Optimize the list by keeping only the not signaled yet fences around. v2: temporary put the signaled fences at the end of the new container Signed-off-by: Christian König --- drivers/dma-buf/reservation.c | 36 ++-- 1 file changed, 26 insertions(+), 10 deletions(-) diff --git a/drivers/dma-buf/reservation.c b/drivers/dma-buf/reservation.c index b44d9d7db347..6fc794576997 100644 --- a/drivers/dma-buf/reservation.c +++ b/drivers/dma-buf/reservation.c @@ -145,8 +145,8 @@ reservation_object_add_shared_replace(struct reservation_object *obj, struct reservation_object_list *fobj, struct dma_fence *fence) { - unsigned i; struct dma_fence *old_fence = NULL; + unsigned i, j, k; dma_fence_get(fence); @@ -162,9 +162,7 @@ reservation_object_add_shared_replace(struct reservation_object *obj, * references from the old struct are carried over to * the new. */ - fobj->shared_count = old->shared_count; - - for (i = 0; i < old->shared_count; ++i) { + for (i = 0, j = 0, k = fobj->shared_max; i < old->shared_count; ++i) { struct dma_fence *check; check = rcu_dereference_protected(old->shared[i], @@ -172,10 +170,18 @@ reservation_object_add_shared_replace(struct reservation_object *obj, if (!old_fence && check->context == fence->context) { old_fence = check; - RCU_INIT_POINTER(fobj->shared[i], fence); - } else - RCU_INIT_POINTER(fobj->shared[i], check); + RCU_INIT_POINTER(fobj->shared[j++], fence); + } else if (!dma_fence_is_signaled(check)) { + RCU_INIT_POINTER(fobj->shared[j++], check); + } else { + /* +* Temporary save the signaled fences at the end of the +* new container +*/ + RCU_INIT_POINTER(fobj->shared[--k], check); + } } + fobj->shared_count = j; if (!old_fence) { RCU_INIT_POINTER(fobj->shared[fobj->shared_count], fence); Seems you still don't address here, how do you sure fobj->shared[fobj->shared_count] is null? if not NULL, the old one will be leak. Regards, David Zhou fobj->shared_count++; @@ -192,10 +198,20 @@ reservation_object_add_shared_replace(struct reservation_object *obj, write_seqcount_end(&obj->seq); preempt_enable(); - if (old) - kfree_rcu(old, rcu); - dma_fence_put(old_fence); + + if (!old) + return; + + /* Drop the references to the signaled fences */ + for (i = k; i < fobj->shared_max; ++i) { + struct dma_fence *f; + + f = rcu_dereference_protected(fobj->shared[i], + reservation_object_held(obj)); + dma_fence_put(f); + } + kfree_rcu(old, rcu); } /** ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH 1/2] dma-buf: keep only not signaled fence in reservation_object_add_shared_replace
On 31/10/17 09:42 AM, Christian König wrote: > Looks like v2 never made it to the list. I've just send the V2 patches > once more. FWIW, I received your v2 patches yesterday, and now twice today. -- Earthling Michel Dänzer | http://www.amd.com Libre software enthusiast | Mesa and X developer ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH] drm/amdgpu: fix error handling in amdgpu_bo_do_create
On 31/10/17 09:37 AM, Christian König wrote: > From: Christian König > > The bo structure is freed up in case of an error, so we can't do any > accounting if that happens. > > Signed-off-by: Christian König > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 6 +++--- > 1 file changed, 3 insertions(+), 3 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c > index e44b880eefdd..ff6f842655d1 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c > @@ -369,6 +369,9 @@ static int amdgpu_bo_do_create(struct amdgpu_device *adev, > r = ttm_bo_init_reserved(&adev->mman.bdev, &bo->tbo, size, type, >&bo->placement, page_align, !kernel, NULL, >acc_size, sg, resv, &amdgpu_ttm_bo_destroy); > + if (unlikely(r != 0)) > + return r; > + > bytes_moved = atomic64_read(&adev->num_bytes_moved) - > initial_bytes_moved; > if (adev->mc.visible_vram_size < adev->mc.real_vram_size && > @@ -378,9 +381,6 @@ static int amdgpu_bo_do_create(struct amdgpu_device *adev, > else > amdgpu_cs_report_moved_bytes(adev, bytes_moved, 0); > > - if (unlikely(r != 0)) > - return r; > - > if (kernel) > bo->tbo.priority = 1; > > Hmm. If it's at least theoretically possible that ttm_bo_init_reserved moves some BOs before returning an error, we should instead make the accounting safe WRT bo having been freed. But assuming it's not possible, Reviewed-by: Michel Dänzer -- Earthling Michel Dänzer | http://www.amd.com Libre software enthusiast | Mesa and X developer ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 2/2] dma-buf: try to replace a signaled fence in reservation_object_add_shared_inplace
The amdgpu issue to also need signaled fences in the reservation objects should be fixed by now. Optimize the handling by replacing a signaled fence when adding a new shared one. Signed-off-by: Christian König --- drivers/dma-buf/reservation.c | 18 +++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/drivers/dma-buf/reservation.c b/drivers/dma-buf/reservation.c index 6fc794576997..a3928ce9f311 100644 --- a/drivers/dma-buf/reservation.c +++ b/drivers/dma-buf/reservation.c @@ -104,7 +104,8 @@ reservation_object_add_shared_inplace(struct reservation_object *obj, struct reservation_object_list *fobj, struct dma_fence *fence) { - u32 i; + struct dma_fence *signaled = NULL; + u32 i, signaled_idx; dma_fence_get(fence); @@ -126,17 +127,28 @@ reservation_object_add_shared_inplace(struct reservation_object *obj, dma_fence_put(old_fence); return; } + + if (!signaled && dma_fence_is_signaled(old_fence)) { + signaled = old_fence; + signaled_idx = i; + } } /* * memory barrier is added by write_seqcount_begin, * fobj->shared_count is protected by this lock too */ - RCU_INIT_POINTER(fobj->shared[fobj->shared_count], fence); - fobj->shared_count++; + if (signaled) { + RCU_INIT_POINTER(fobj->shared[signaled_idx], fence); + } else { + RCU_INIT_POINTER(fobj->shared[fobj->shared_count], fence); + fobj->shared_count++; + } write_seqcount_end(&obj->seq); preempt_enable(); + + dma_fence_put(signaled); } static void -- 2.11.0 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 1/2] dma-buf: keep only not signaled fence in reservation_object_add_shared_replace v2
The amdgpu issue to also need signaled fences in the reservation objects should be fixed by now. Optimize the list by keeping only the not signaled yet fences around. v2: temporary put the signaled fences at the end of the new container Signed-off-by: Christian König --- drivers/dma-buf/reservation.c | 36 ++-- 1 file changed, 26 insertions(+), 10 deletions(-) diff --git a/drivers/dma-buf/reservation.c b/drivers/dma-buf/reservation.c index b44d9d7db347..6fc794576997 100644 --- a/drivers/dma-buf/reservation.c +++ b/drivers/dma-buf/reservation.c @@ -145,8 +145,8 @@ reservation_object_add_shared_replace(struct reservation_object *obj, struct reservation_object_list *fobj, struct dma_fence *fence) { - unsigned i; struct dma_fence *old_fence = NULL; + unsigned i, j, k; dma_fence_get(fence); @@ -162,9 +162,7 @@ reservation_object_add_shared_replace(struct reservation_object *obj, * references from the old struct are carried over to * the new. */ - fobj->shared_count = old->shared_count; - - for (i = 0; i < old->shared_count; ++i) { + for (i = 0, j = 0, k = fobj->shared_max; i < old->shared_count; ++i) { struct dma_fence *check; check = rcu_dereference_protected(old->shared[i], @@ -172,10 +170,18 @@ reservation_object_add_shared_replace(struct reservation_object *obj, if (!old_fence && check->context == fence->context) { old_fence = check; - RCU_INIT_POINTER(fobj->shared[i], fence); - } else - RCU_INIT_POINTER(fobj->shared[i], check); + RCU_INIT_POINTER(fobj->shared[j++], fence); + } else if (!dma_fence_is_signaled(check)) { + RCU_INIT_POINTER(fobj->shared[j++], check); + } else { + /* +* Temporary save the signaled fences at the end of the +* new container +*/ + RCU_INIT_POINTER(fobj->shared[--k], check); + } } + fobj->shared_count = j; if (!old_fence) { RCU_INIT_POINTER(fobj->shared[fobj->shared_count], fence); fobj->shared_count++; @@ -192,10 +198,20 @@ reservation_object_add_shared_replace(struct reservation_object *obj, write_seqcount_end(&obj->seq); preempt_enable(); - if (old) - kfree_rcu(old, rcu); - dma_fence_put(old_fence); + + if (!old) + return; + + /* Drop the references to the signaled fences */ + for (i = k; i < fobj->shared_max; ++i) { + struct dma_fence *f; + + f = rcu_dereference_protected(fobj->shared[i], + reservation_object_held(obj)); + dma_fence_put(f); + } + kfree_rcu(old, rcu); } /** -- 2.11.0 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH 1/2] dma-buf: keep only not signaled fence in reservation_object_add_shared_replace
Looks like v2 never made it to the list. I've just send the V2 patches once more. Christian. Am 31.10.2017 um 08:26 schrieb Chunming Zhou: Any update? On 2017年10月25日 15:28, Christian König wrote: Am 25.10.2017 um 08:42 schrieb Chunming Zhou: On 2017年10月24日 21:55, Christian König wrote: From: Christian König The amdgpu issue to also need signaled fences in the reservation objects should be fixed by now. Optimize the list by keeping only the not signaled yet fences around. Signed-off-by: Christian König --- drivers/dma-buf/reservation.c | 31 +-- 1 file changed, 21 insertions(+), 10 deletions(-) diff --git a/drivers/dma-buf/reservation.c b/drivers/dma-buf/reservation.c index b44d9d7db347..4ede77d1bb31 100644 --- a/drivers/dma-buf/reservation.c +++ b/drivers/dma-buf/reservation.c @@ -145,8 +145,8 @@ reservation_object_add_shared_replace(struct reservation_object *obj, struct reservation_object_list *fobj, struct dma_fence *fence) { - unsigned i; struct dma_fence *old_fence = NULL; + unsigned i, j, k; dma_fence_get(fence); @@ -162,9 +162,7 @@ reservation_object_add_shared_replace(struct reservation_object *obj, * references from the old struct are carried over to * the new. */ - fobj->shared_count = old->shared_count; - - for (i = 0; i < old->shared_count; ++i) { + for (i = 0, j = 0, k = old->shared_count; i < old->shared_count; ++i) { struct dma_fence *check; check = rcu_dereference_protected(old->shared[i], @@ -172,10 +170,14 @@ reservation_object_add_shared_replace(struct reservation_object *obj, if (!old_fence && check->context == fence->context) { old_fence = check; - RCU_INIT_POINTER(fobj->shared[i], fence); - } else - RCU_INIT_POINTER(fobj->shared[i], check); + RCU_INIT_POINTER(fobj->shared[j++], fence); + } else if (!dma_fence_is_signaled(check)) { + RCU_INIT_POINTER(fobj->shared[j++], check); + } else { + RCU_INIT_POINTER(fobj->shared[--k], check); + } } + fobj->shared_count = j; if (!old_fence) { RCU_INIT_POINTER(fobj->shared[fobj->shared_count], fence); Here there is a memory leak for signaled fence slots, since you re-order the slots, the j'th slot is storing signaled fence, there is no place to put it when you assign to new one. you cam move it to end or put it here first. Good point, thanks. Going to respin. Regards, Christian. Regards, David Zhou fobj->shared_count++; @@ -192,10 +194,19 @@ reservation_object_add_shared_replace(struct reservation_object *obj, write_seqcount_end(&obj->seq); preempt_enable(); - if (old) - kfree_rcu(old, rcu); - dma_fence_put(old_fence); + + if (!old) + return; + + for (i = fobj->shared_count; i < old->shared_count; ++i) { + struct dma_fence *f; + + f = rcu_dereference_protected(fobj->shared[i], + reservation_object_held(obj)); + dma_fence_put(f); + } + kfree_rcu(old, rcu); } /** ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 1/2] dma-buf: keep only not signaled fence in reservation_object_add_shared_replace v2
From: Christian König The amdgpu issue to also need signaled fences in the reservation objects should be fixed by now. Optimize the list by keeping only the not signaled yet fences around. v2: temporary put the signaled fences at the end of the new container Signed-off-by: Christian König --- drivers/dma-buf/reservation.c | 36 ++-- 1 file changed, 26 insertions(+), 10 deletions(-) diff --git a/drivers/dma-buf/reservation.c b/drivers/dma-buf/reservation.c index b44d9d7db347..6fc794576997 100644 --- a/drivers/dma-buf/reservation.c +++ b/drivers/dma-buf/reservation.c @@ -145,8 +145,8 @@ reservation_object_add_shared_replace(struct reservation_object *obj, struct reservation_object_list *fobj, struct dma_fence *fence) { - unsigned i; struct dma_fence *old_fence = NULL; + unsigned i, j, k; dma_fence_get(fence); @@ -162,9 +162,7 @@ reservation_object_add_shared_replace(struct reservation_object *obj, * references from the old struct are carried over to * the new. */ - fobj->shared_count = old->shared_count; - - for (i = 0; i < old->shared_count; ++i) { + for (i = 0, j = 0, k = fobj->shared_max; i < old->shared_count; ++i) { struct dma_fence *check; check = rcu_dereference_protected(old->shared[i], @@ -172,10 +170,18 @@ reservation_object_add_shared_replace(struct reservation_object *obj, if (!old_fence && check->context == fence->context) { old_fence = check; - RCU_INIT_POINTER(fobj->shared[i], fence); - } else - RCU_INIT_POINTER(fobj->shared[i], check); + RCU_INIT_POINTER(fobj->shared[j++], fence); + } else if (!dma_fence_is_signaled(check)) { + RCU_INIT_POINTER(fobj->shared[j++], check); + } else { + /* +* Temporary save the signaled fences at the end of the +* new container +*/ + RCU_INIT_POINTER(fobj->shared[--k], check); + } } + fobj->shared_count = j; if (!old_fence) { RCU_INIT_POINTER(fobj->shared[fobj->shared_count], fence); fobj->shared_count++; @@ -192,10 +198,20 @@ reservation_object_add_shared_replace(struct reservation_object *obj, write_seqcount_end(&obj->seq); preempt_enable(); - if (old) - kfree_rcu(old, rcu); - dma_fence_put(old_fence); + + if (!old) + return; + + /* Drop the references to the signaled fences */ + for (i = k; i < fobj->shared_max; ++i) { + struct dma_fence *f; + + f = rcu_dereference_protected(fobj->shared[i], + reservation_object_held(obj)); + dma_fence_put(f); + } + kfree_rcu(old, rcu); } /** -- 2.11.0 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 2/2] dma-buf: try to replace a signaled fence in reservation_object_add_shared_inplace
From: Christian König The amdgpu issue to also need signaled fences in the reservation objects should be fixed by now. Optimize the handling by replacing a signaled fence when adding a new shared one. Signed-off-by: Christian König --- drivers/dma-buf/reservation.c | 18 +++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/drivers/dma-buf/reservation.c b/drivers/dma-buf/reservation.c index 6fc794576997..a3928ce9f311 100644 --- a/drivers/dma-buf/reservation.c +++ b/drivers/dma-buf/reservation.c @@ -104,7 +104,8 @@ reservation_object_add_shared_inplace(struct reservation_object *obj, struct reservation_object_list *fobj, struct dma_fence *fence) { - u32 i; + struct dma_fence *signaled = NULL; + u32 i, signaled_idx; dma_fence_get(fence); @@ -126,17 +127,28 @@ reservation_object_add_shared_inplace(struct reservation_object *obj, dma_fence_put(old_fence); return; } + + if (!signaled && dma_fence_is_signaled(old_fence)) { + signaled = old_fence; + signaled_idx = i; + } } /* * memory barrier is added by write_seqcount_begin, * fobj->shared_count is protected by this lock too */ - RCU_INIT_POINTER(fobj->shared[fobj->shared_count], fence); - fobj->shared_count++; + if (signaled) { + RCU_INIT_POINTER(fobj->shared[signaled_idx], fence); + } else { + RCU_INIT_POINTER(fobj->shared[fobj->shared_count], fence); + fobj->shared_count++; + } write_seqcount_end(&obj->seq); preempt_enable(); + + dma_fence_put(signaled); } static void -- 2.11.0 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH] drm/amdgpu: fix error handling in amdgpu_bo_do_create
From: Christian König The bo structure is freed up in case of an error, so we can't do any accounting if that happens. Signed-off-by: Christian König --- drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index e44b880eefdd..ff6f842655d1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -369,6 +369,9 @@ static int amdgpu_bo_do_create(struct amdgpu_device *adev, r = ttm_bo_init_reserved(&adev->mman.bdev, &bo->tbo, size, type, &bo->placement, page_align, !kernel, NULL, acc_size, sg, resv, &amdgpu_ttm_bo_destroy); + if (unlikely(r != 0)) + return r; + bytes_moved = atomic64_read(&adev->num_bytes_moved) - initial_bytes_moved; if (adev->mc.visible_vram_size < adev->mc.real_vram_size && @@ -378,9 +381,6 @@ static int amdgpu_bo_do_create(struct amdgpu_device *adev, else amdgpu_cs_report_moved_bytes(adev, bytes_moved, 0); - if (unlikely(r != 0)) - return r; - if (kernel) bo->tbo.priority = 1; -- 2.11.0 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: RE: Can you tell me which driver supports AMD's sriov?
hi,all: Now if the GPU hypervisor driver supporting amdgpu SRIOV ? which patch? which kernel version ? maok...@126.com From: Yu, Xiangliang Date: 2017-09-15 10:15 To: maok...@126.com; amd-gfx Subject: RE: Can you tell me which driver supports AMD's sriov? Need GPU hypervisor driver if supporting amdgpu SRIOV. Right now, we still doesn’t upstream the hypervisor driver to kernel, but we have a plan to upstream it. From: amd-gfx [mailto:amd-gfx-boun...@lists.freedesktop.org] On Behalf Of maok...@126.com Sent: Thursday, September 14, 2017 3:57 PM To: amd-gfx Subject: Can you tell me which driver supports AMD's sriov? hi,all: Which patch begins to support sriov for amd graphics cards ? Graphics cards such as AMD FirePro s7150. I tested a lot of systems that didn't support, such as centos7.2, centos7.3, unbuntu17.4. maok...@126.com ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 1/2] drm/amdgpu: return error when sriov access requests get timeout (v2)
From: pding v2: - readable Reported-by: Sun Gary Signed-off-by: pding --- drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c | 6 -- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c index 818ec0f..2b435c0 100644 --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c @@ -446,8 +446,10 @@ static int xgpu_vi_send_access_requests(struct amdgpu_device *adev, request == IDH_REQ_GPU_FINI_ACCESS || request == IDH_REQ_GPU_RESET_ACCESS) { r = xgpu_vi_poll_msg(adev, IDH_READY_TO_ACCESS_GPU); - if (r) - pr_err("Doesn't get ack from pf, continue\n"); + if (r) { + pr_err("Doesn't get ack from pf, give up\n"); + return r; + } } return 0; -- 2.9.5 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH 1/2] dma-buf: keep only not signaled fence in reservation_object_add_shared_replace
Any update? On 2017年10月25日 15:28, Christian König wrote: Am 25.10.2017 um 08:42 schrieb Chunming Zhou: On 2017年10月24日 21:55, Christian König wrote: From: Christian König The amdgpu issue to also need signaled fences in the reservation objects should be fixed by now. Optimize the list by keeping only the not signaled yet fences around. Signed-off-by: Christian König --- drivers/dma-buf/reservation.c | 31 +-- 1 file changed, 21 insertions(+), 10 deletions(-) diff --git a/drivers/dma-buf/reservation.c b/drivers/dma-buf/reservation.c index b44d9d7db347..4ede77d1bb31 100644 --- a/drivers/dma-buf/reservation.c +++ b/drivers/dma-buf/reservation.c @@ -145,8 +145,8 @@ reservation_object_add_shared_replace(struct reservation_object *obj, struct reservation_object_list *fobj, struct dma_fence *fence) { - unsigned i; struct dma_fence *old_fence = NULL; + unsigned i, j, k; dma_fence_get(fence); @@ -162,9 +162,7 @@ reservation_object_add_shared_replace(struct reservation_object *obj, * references from the old struct are carried over to * the new. */ - fobj->shared_count = old->shared_count; - - for (i = 0; i < old->shared_count; ++i) { + for (i = 0, j = 0, k = old->shared_count; i < old->shared_count; ++i) { struct dma_fence *check; check = rcu_dereference_protected(old->shared[i], @@ -172,10 +170,14 @@ reservation_object_add_shared_replace(struct reservation_object *obj, if (!old_fence && check->context == fence->context) { old_fence = check; - RCU_INIT_POINTER(fobj->shared[i], fence); - } else - RCU_INIT_POINTER(fobj->shared[i], check); + RCU_INIT_POINTER(fobj->shared[j++], fence); + } else if (!dma_fence_is_signaled(check)) { + RCU_INIT_POINTER(fobj->shared[j++], check); + } else { + RCU_INIT_POINTER(fobj->shared[--k], check); + } } + fobj->shared_count = j; if (!old_fence) { RCU_INIT_POINTER(fobj->shared[fobj->shared_count], fence); Here there is a memory leak for signaled fence slots, since you re-order the slots, the j'th slot is storing signaled fence, there is no place to put it when you assign to new one. you cam move it to end or put it here first. Good point, thanks. Going to respin. Regards, Christian. Regards, David Zhou fobj->shared_count++; @@ -192,10 +194,19 @@ reservation_object_add_shared_replace(struct reservation_object *obj, write_seqcount_end(&obj->seq); preempt_enable(); - if (old) - kfree_rcu(old, rcu); - dma_fence_put(old_fence); + + if (!old) + return; + + for (i = fobj->shared_count; i < old->shared_count; ++i) { + struct dma_fence *f; + + f = rcu_dereference_protected(fobj->shared[i], + reservation_object_held(obj)); + dma_fence_put(f); + } + kfree_rcu(old, rcu); } /** ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 2/2] drm/amdgpu: retry init if exclusive mode request is failed
From: pding This is caused of that hypervisor fails to handle request, one known issue is MMIO unblocking timeout. In theory we can retry init here. Signed-off-by: pding --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index daa2098..ef01aa3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -1625,7 +1625,7 @@ static int amdgpu_early_init(struct amdgpu_device *adev) if (amdgpu_sriov_vf(adev)) { r = amdgpu_virt_request_full_gpu(adev, true); if (r) - return r; + return -EAGAIN; } for (i = 0; i < adev->num_ip_blocks; i++) { -- 2.9.5 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 1/2] drm/amdgpu: return error when sriov access requests get timeout
From: pding Reported-by: Sun Gary Signed-off-by: pding --- drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c index 818ec0f..f291fb2 100644 --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c @@ -450,7 +450,7 @@ static int xgpu_vi_send_access_requests(struct amdgpu_device *adev, pr_err("Doesn't get ack from pf, continue\n"); } - return 0; + return r; } static int xgpu_vi_request_reset(struct amdgpu_device *adev) -- 2.9.5 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH] drm/amdgpu: release exclusive mode after hw_init if no kfd (v2)
Hi Oded, Would you please help reviewing the V2 patch? — Sincerely Yours, Pixel On 31/10/2017, 9:47 AM, "Pixel Ding" wrote: >From: pding > >Move kfd probe prior to device init. Release exclusive mode >after hw_init if kfd is not enabled. > >v2: > - pass pdev param > >Signed-off-by: pding >--- > drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 5 +++-- > drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 3 ++- > drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 3 +++ > drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c| 5 +++-- > 4 files changed, 11 insertions(+), 5 deletions(-) > >diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c >b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c >index c70cda0..f0f5d0e 100644 >--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c >+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c >@@ -68,7 +68,8 @@ void amdgpu_amdkfd_fini(void) > } > } > >-void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev) >+void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev, >+ struct pci_dev *pdev) > { > const struct kfd2kgd_calls *kfd2kgd; > >@@ -90,7 +91,7 @@ void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev) > } > > adev->kfd = kgd2kfd->probe((struct kgd_dev *)adev, >- adev->pdev, kfd2kgd); >+ pdev, kfd2kgd); > } > > void amdgpu_amdkfd_device_init(struct amdgpu_device *adev) >diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h >b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h >index 8d689ab..707c892 100644 >--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h >+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h >@@ -44,7 +44,8 @@ void amdgpu_amdkfd_suspend(struct amdgpu_device *adev); > int amdgpu_amdkfd_resume(struct amdgpu_device *adev); > void amdgpu_amdkfd_interrupt(struct amdgpu_device *adev, > const void *ih_ring_entry); >-void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev); >+void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev, >+ struct pci_dev *pdev); > void amdgpu_amdkfd_device_init(struct amdgpu_device *adev); > void amdgpu_amdkfd_device_fini(struct amdgpu_device *adev); > >diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c >b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c >index 2ff2c54..daa2098 100644 >--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c >+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c >@@ -1716,6 +1716,9 @@ static int amdgpu_init(struct amdgpu_device *adev) > adev->ip_blocks[i].status.hw = true; > } > >+ if (amdgpu_sriov_vf(adev) && !adev->kfd) >+ amdgpu_virt_release_full_gpu(adev, true); >+ > return 0; > } > >diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c >b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c >index 3e9760d..f872052 100644 >--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c >+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c >@@ -138,6 +138,8 @@ int amdgpu_driver_load_kms(struct drm_device *dev, >unsigned long flags) > !pci_is_thunderbolt_attached(dev->pdev)) > flags |= AMD_IS_PX; > >+ amdgpu_amdkfd_device_probe(adev, dev->pdev); >+ > /* amdgpu_device_init should report only fatal error >* like memory allocation failure or iomapping failure, >* or memory manager initialization failure, it must >@@ -170,7 +172,6 @@ int amdgpu_driver_load_kms(struct drm_device *dev, >unsigned long flags) > "Error during ACPI methods call\n"); > } > >- amdgpu_amdkfd_device_probe(adev); > amdgpu_amdkfd_device_init(adev); > > if (amdgpu_device_is_px(dev)) { >@@ -182,7 +183,7 @@ int amdgpu_driver_load_kms(struct drm_device *dev, >unsigned long flags) > pm_runtime_put_autosuspend(dev->dev); > } > >- if (amdgpu_sriov_vf(adev)) >+ if (amdgpu_sriov_vf(adev) && adev->kfd) > amdgpu_virt_release_full_gpu(adev, true); > > out: >-- >2.9.5 > ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx