On Mon, Jan 18, 2021 at 3:40 PM Thomas Zimmermann <tzimmerm...@suse.de> wrote:
>
> Hi
>
> Am 18.01.21 um 14:50 schrieb Christian König:
> > Hi Thomas,
> >
> > this patch unfortunately completely broke amdgpu.
> >
> > See the splat below:
> >
> > [   74.553881]
> > ==================================================================
> > [   74.554060] BUG: KASAN: null-ptr-deref in
> > drm_pci_set_busid+0x38/0x100 [drm]
> > [   74.554393] Read of size 4 at addr 0000000000000038 by task Xorg/1115
> >
> > [   74.554585] CPU: 6 PID: 1115 Comm: Xorg Not tainted 5.11.0-rc2+ #75
> > [   74.554594] Hardware name: System manufacturer System Product
> > Name/PRIME X399-A, BIOS 0808 10/12/2018
> > [   74.554600] Call Trace:
> > [   74.554605]  dump_stack+0x9d/0xce
> > [   74.554616]  ? drm_pci_set_busid+0x38/0x100 [drm]
> > [   74.554787]  kasan_report.cold+0x5d/0xd1
> > [   74.554799]  ? drm_pci_set_busid+0x38/0x100 [drm]
> > [   74.554969]  __asan_load4+0x6b/0x90
> > [   74.554978]  drm_pci_set_busid+0x38/0x100 [drm]
> > [   74.555148]  drm_setversion+0x2ce/0x350 [drm]
> > [   74.555315]  ? drm_is_current_master+0x5d/0x70 [drm]
> > [   74.555481]  drm_ioctl_kernel+0x16d/0x1c0 [drm]
> > [   74.555648]  ? drm_ioctl_permit+0xb0/0xb0 [drm]
> > [   74.555811]  ? drm_setversion+0x350/0x350 [drm]
> > [   74.555973]  ? check_stack_object+0x2d/0xb0
> > [   74.555985]  drm_ioctl+0x363/0x5a0 [drm]
> > [   74.556147]  ? drm_ioctl_permit+0xb0/0xb0 [drm]
> > [   74.556310]  ? drm_ioctl_kernel+0x1c0/0x1c0 [drm]
> > [   74.556473]  ? __kasan_check_write+0x14/0x20
> > [   74.556481]  ? _raw_spin_lock_irqsave+0x8e/0xf0
> > [   74.556492]  ? _raw_spin_trylock_bh+0x100/0x100
> > [   74.556504]  amdgpu_drm_ioctl+0x7e/0xd0 [amdgpu]
> > [   74.557409]  __x64_sys_ioctl+0xc3/0x100
> > [   74.557418]  do_syscall_64+0x38/0x90
> > [   74.557427]  entry_SYSCALL_64_after_hwframe+0x44/0xa9
> >
> > Any idea what's going wrong here?
>
> I meanwhile posted an updated patchset with a fix in patch 1. [1] Maybe
> you can apply this one and test.
>
> The original bug report and testing is at [2]. Apparently, DRM core has
> to be changed together with drivers. I'm honestly not sure why.

Same thing Chris pointed out on the i915 patch: If drivers stop
setting drm->pdev, but core still uses it, we have booms. So that
patch 1 needs to land asap I think.
-Daniel

>
> Best regards
> Thomas
>
> [1]
> https://lore.kernel.org/dri-devel/20210118131420.15874-1-tzimmerm...@suse.de/T/#m8a0cdf02375a4e23e194d2e7eb80e8738632ea84
> [2]
> https://lore.kernel.org/dri-devel/7851c78c-8c57-3c84-cd49-a72703095...@suse.de/
>
> >
> > Thanks in advance,
> > Christian.
> >
> > Am 07.01.21 um 09:07 schrieb Thomas Zimmermann:
> >> Using struct drm_device.pdev is deprecated. Convert amdgpu to struct
> >> drm_device.dev. No functional changes.
> >>
> >> v3:
> >>     * rebased
> >>
> >> Signed-off-by: Thomas Zimmermann <tzimmerm...@suse.de>
> >> Acked-by: Christian König <christian.koe...@amd.com>
> >> Acked-by: Alex Deucher <alexander.deuc...@amd.com>
> >> Acked-by: Sam Ravnborg <s...@ravnborg.org>
> >> Cc: Alex Deucher <alexander.deuc...@amd.com>
> >> Cc: Christian König <christian.koe...@amd.com>
> >> ---
> >>   drivers/gpu/drm/amd/amdgpu/amdgpu_device.c  | 17 ++++++++---------
> >>   drivers/gpu/drm/amd/amdgpu/amdgpu_display.c |  1 +
> >>   drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c     |  1 -
> >>   drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c      |  2 +-
> >>   drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c     | 10 +++++-----
> >>   drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.c     |  2 +-
> >>   drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c     | 10 +++++-----
> >>   7 files changed, 21 insertions(+), 22 deletions(-)
> >>
> >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> >> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> >> index 7d16395ede0a..f7e2a878411e 100644
> >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> >> @@ -1423,9 +1423,9 @@ static void amdgpu_switcheroo_set_state(struct
> >> pci_dev *pdev,
> >>           /* don't suspend or resume card normally */
> >>           dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
> >> -        pci_set_power_state(dev->pdev, PCI_D0);
> >> -        amdgpu_device_load_pci_state(dev->pdev);
> >> -        r = pci_enable_device(dev->pdev);
> >> +        pci_set_power_state(pdev, PCI_D0);
> >> +        amdgpu_device_load_pci_state(pdev);
> >> +        r = pci_enable_device(pdev);
> >>           if (r)
> >>               DRM_WARN("pci_enable_device failed (%d)\n", r);
> >>           amdgpu_device_resume(dev, true);
> >> @@ -1437,10 +1437,10 @@ static void amdgpu_switcheroo_set_state(struct
> >> pci_dev *pdev,
> >>           drm_kms_helper_poll_disable(dev);
> >>           dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
> >>           amdgpu_device_suspend(dev, true);
> >> -        amdgpu_device_cache_pci_state(dev->pdev);
> >> +        amdgpu_device_cache_pci_state(pdev);
> >>           /* Shut down the device */
> >> -        pci_disable_device(dev->pdev);
> >> -        pci_set_power_state(dev->pdev, PCI_D3cold);
> >> +        pci_disable_device(pdev);
> >> +        pci_set_power_state(pdev, PCI_D3cold);
> >>           dev->switch_power_state = DRM_SWITCH_POWER_OFF;
> >>       }
> >>   }
> >> @@ -1703,8 +1703,7 @@ static void
> >> amdgpu_device_enable_virtual_display(struct amdgpu_device *adev)
> >>       adev->enable_virtual_display = false;
> >>       if (amdgpu_virtual_display) {
> >> -        struct drm_device *ddev = adev_to_drm(adev);
> >> -        const char *pci_address_name = pci_name(ddev->pdev);
> >> +        const char *pci_address_name = pci_name(adev->pdev);
> >>           char *pciaddstr, *pciaddstr_tmp, *pciaddname_tmp, *pciaddname;
> >>           pciaddstr = kstrdup(amdgpu_virtual_display, GFP_KERNEL);
> >> @@ -3397,7 +3396,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
> >>           }
> >>       }
> >> -    pci_enable_pcie_error_reporting(adev->ddev.pdev);
> >> +    pci_enable_pcie_error_reporting(adev->pdev);
> >>       /* Post card if necessary */
> >>       if (amdgpu_device_need_post(adev)) {
> >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
> >> b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
> >> index f764803c53a4..0150a51b65ef 100644
> >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
> >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
> >> @@ -926,6 +926,7 @@ amdgpu_display_user_framebuffer_create(struct
> >> drm_device *dev,
> >>                          struct drm_file *file_priv,
> >>                          const struct drm_mode_fb_cmd2 *mode_cmd)
> >>   {
> >> +    struct amdgpu_device *adev = drm_to_adev(dev);
> >>       struct drm_gem_object *obj;
> >>       struct amdgpu_framebuffer *amdgpu_fb;
> >>       int ret;
> >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> >> b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> >> index 72efd579ec5e..b4ea67e12ada 100644
> >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> >> @@ -1204,7 +1204,6 @@ static int amdgpu_pci_probe(struct pci_dev *pdev,
> >>       if (ret)
> >>           return ret;
> >> -    ddev->pdev = pdev;
> >>       pci_set_drvdata(pdev, ddev);
> >>       ret = amdgpu_driver_load_kms(adev, ent->driver_data);
> >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
> >> b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
> >> index 0bf7d36c6686..51cd49c6f38f 100644
> >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
> >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
> >> @@ -271,7 +271,7 @@ static int amdgpufb_create(struct drm_fb_helper
> >> *helper,
> >>       DRM_INFO("fb depth is %d\n", fb->format->depth);
> >>       DRM_INFO("   pitch is %d\n", fb->pitches[0]);
> >> -    vga_switcheroo_client_fb_set(adev_to_drm(adev)->pdev, info);
> >> +    vga_switcheroo_client_fb_set(adev->pdev, info);
> >>       return 0;
> >>   out:
> >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> >> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> >> index d0a1fee1f5f6..a5c42c3004a0 100644
> >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> >> @@ -619,7 +619,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev,
> >> void *data,
> >>       int r = 0;
> >>       if (args->va_address < AMDGPU_VA_RESERVED_SIZE) {
> >> -        dev_dbg(&dev->pdev->dev,
> >> +        dev_dbg(dev->dev,
> >>               "va_address 0x%LX is in reserved area 0x%LX\n",
> >>               args->va_address, AMDGPU_VA_RESERVED_SIZE);
> >>           return -EINVAL;
> >> @@ -627,7 +627,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev,
> >> void *data,
> >>       if (args->va_address >= AMDGPU_GMC_HOLE_START &&
> >>           args->va_address < AMDGPU_GMC_HOLE_END) {
> >> -        dev_dbg(&dev->pdev->dev,
> >> +        dev_dbg(dev->dev,
> >>               "va_address 0x%LX is in VA hole 0x%LX-0x%LX\n",
> >>               args->va_address, AMDGPU_GMC_HOLE_START,
> >>               AMDGPU_GMC_HOLE_END);
> >> @@ -639,14 +639,14 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev,
> >> void *data,
> >>       vm_size = adev->vm_manager.max_pfn * AMDGPU_GPU_PAGE_SIZE;
> >>       vm_size -= AMDGPU_VA_RESERVED_SIZE;
> >>       if (args->va_address + args->map_size > vm_size) {
> >> -        dev_dbg(&dev->pdev->dev,
> >> +        dev_dbg(dev->dev,
> >>               "va_address 0x%llx is in top reserved area 0x%llx\n",
> >>               args->va_address + args->map_size, vm_size);
> >>           return -EINVAL;
> >>       }
> >>       if ((args->flags & ~valid_flags) && (args->flags & ~prt_flags)) {
> >> -        dev_dbg(&dev->pdev->dev, "invalid flags combination 0x%08X\n",
> >> +        dev_dbg(dev->dev, "invalid flags combination 0x%08X\n",
> >>               args->flags);
> >>           return -EINVAL;
> >>       }
> >> @@ -658,7 +658,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev,
> >> void *data,
> >>       case AMDGPU_VA_OP_REPLACE:
> >>           break;
> >>       default:
> >> -        dev_dbg(&dev->pdev->dev, "unsupported operation %d\n",
> >> +        dev_dbg(dev->dev, "unsupported operation %d\n",
> >>               args->operation);
> >>           return -EINVAL;
> >>       }
> >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.c
> >> b/drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.c
> >> index 47cad23a6b9e..bca4dddd5a15 100644
> >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.c
> >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.c
> >> @@ -176,7 +176,7 @@ struct amdgpu_i2c_chan *amdgpu_i2c_create(struct
> >> drm_device *dev,
> >>       i2c->rec = *rec;
> >>       i2c->adapter.owner = THIS_MODULE;
> >>       i2c->adapter.class = I2C_CLASS_DDC;
> >> -    i2c->adapter.dev.parent = &dev->pdev->dev;
> >> +    i2c->adapter.dev.parent = dev->dev;
> >>       i2c->dev = dev;
> >>       i2c_set_adapdata(&i2c->adapter, i2c);
> >>       mutex_init(&i2c->mutex);
> >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> >> b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> >> index b16b32797624..3c37cf1ae8b7 100644
> >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> >> @@ -142,7 +142,7 @@ int amdgpu_driver_load_kms(struct amdgpu_device
> >> *adev, unsigned long flags)
> >>           (amdgpu_is_atpx_hybrid() ||
> >>            amdgpu_has_atpx_dgpu_power_cntl()) &&
> >>           ((flags & AMD_IS_APU) == 0) &&
> >> -        !pci_is_thunderbolt_attached(dev->pdev))
> >> +        !pci_is_thunderbolt_attached(to_pci_dev(dev->dev)))
> >>           flags |= AMD_IS_PX;
> >>       parent = pci_upstream_bridge(adev->pdev);
> >> @@ -156,7 +156,7 @@ int amdgpu_driver_load_kms(struct amdgpu_device
> >> *adev, unsigned long flags)
> >>        */
> >>       r = amdgpu_device_init(adev, flags);
> >>       if (r) {
> >> -        dev_err(&dev->pdev->dev, "Fatal error during GPU init\n");
> >> +        dev_err(dev->dev, "Fatal error during GPU init\n");
> >>           goto out;
> >>       }
> >> @@ -199,7 +199,7 @@ int amdgpu_driver_load_kms(struct amdgpu_device
> >> *adev, unsigned long flags)
> >>       acpi_status = amdgpu_acpi_init(adev);
> >>       if (acpi_status)
> >> -        dev_dbg(&dev->pdev->dev, "Error during ACPI methods call\n");
> >> +        dev_dbg(dev->dev, "Error during ACPI methods call\n");
> >>       if (adev->runpm) {
> >>           /* only need to skip on ATPX */
> >> @@ -735,10 +735,10 @@ int amdgpu_info_ioctl(struct drm_device *dev,
> >> void *data, struct drm_file *filp)
> >>           if (!dev_info)
> >>               return -ENOMEM;
> >> -        dev_info->device_id = dev->pdev->device;
> >> +        dev_info->device_id = adev->pdev->device;
> >>           dev_info->chip_rev = adev->rev_id;
> >>           dev_info->external_rev = adev->external_rev_id;
> >> -        dev_info->pci_rev = dev->pdev->revision;
> >> +        dev_info->pci_rev = adev->pdev->revision;
> >>           dev_info->family = adev->family;
> >>           dev_info->num_shader_engines =
> >> adev->gfx.config.max_shader_engines;
> >>           dev_info->num_shader_arrays_per_engine =
> >> adev->gfx.config.max_sh_per_se;
> >
> > _______________________________________________
> > dri-devel mailing list
> > dri-de...@lists.freedesktop.org
> > https://lists.freedesktop.org/mailman/listinfo/dri-devel
>
> --
> Thomas Zimmermann
> Graphics Driver Developer
> SUSE Software Solutions Germany GmbH
> Maxfeldstr. 5, 90409 Nürnberg, Germany
> (HRB 36809, AG Nürnberg)
> Geschäftsführer: Felix Imendörffer
>


-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to