Re: [PATCH v5 05/11] drm/amdgpu: Use RMW accessors for changing LNKCTL
On Fri, 21 Jul 2023, Alex Deucher wrote: > On Fri, Jul 21, 2023 at 4:18 AM Ilpo Järvinen > wrote: > > > > On Thu, 20 Jul 2023, Bjorn Helgaas wrote: > > > > > On Mon, Jul 17, 2023 at 03:04:57PM +0300, Ilpo Järvinen wrote: > > > > Don't assume that only the driver would be accessing LNKCTL. ASPM > > > > policy changes can trigger write to LNKCTL outside of driver's control. > > > > And in the case of upstream bridge, the driver does not even own the > > > > device it's changing the registers for. > > > > > > > > Use RMW capability accessors which do proper locking to avoid losing > > > > concurrent updates to the register value. > > > > > > > > Fixes: a2e73f56fa62 ("drm/amdgpu: Add support for CIK parts") > > > > Fixes: 62a37553414a ("drm/amdgpu: add si implementation v10") > > > > Suggested-by: Lukas Wunner > > > > Signed-off-by: Ilpo Järvinen > > > > Cc: sta...@vger.kernel.org > > > > > > Do we have any reports of problems that are fixed by this patch (or by > > > others in the series)? If not, I'm not sure it really fits the usual > > > stable kernel criteria: > > > > > > https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/Documentation/process/stable-kernel-rules.rst?id=v6.4 > > > > I was on the edge with this. The answer to your direct question is no, > > there are no such reports so it would be okay to leave stable out I think. > > This applies to all patches in this series. > > > > Basically, this series came to be after Lukas noted the potential > > concurrency issues with how LNKCTL is unprotected when reviewing > > (internally) my bandwidth controller series. Then I went to look around > > all LNKCTL usage and realized existing things might alreary have similar > > issues. > > > > Do you want me to send another version w/o cc stable or you'll take care > > of that? > > > > > > --- > > > > drivers/gpu/drm/amd/amdgpu/cik.c | 36 +--- > > > > drivers/gpu/drm/amd/amdgpu/si.c | 36 +--- > > > > 2 files changed, 20 insertions(+), 52 deletions(-) > > > > > > > > diff --git a/drivers/gpu/drm/amd/amdgpu/cik.c > > > > b/drivers/gpu/drm/amd/amdgpu/cik.c > > > > index 5641cf05d856..e63abdf52b6c 100644 > > > > --- a/drivers/gpu/drm/amd/amdgpu/cik.c > > > > +++ b/drivers/gpu/drm/amd/amdgpu/cik.c > > > > @@ -1574,17 +1574,8 @@ static void cik_pcie_gen3_enable(struct > > > > amdgpu_device *adev) > > > > u16 bridge_cfg2, gpu_cfg2; > > > > u32 max_lw, current_lw, tmp; > > > > > > > > - pcie_capability_read_word(root, PCI_EXP_LNKCTL, > > > > - &bridge_cfg); > > > > - pcie_capability_read_word(adev->pdev, > > > > PCI_EXP_LNKCTL, > > > > - &gpu_cfg); > > > > - > > > > - tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD; > > > > - pcie_capability_write_word(root, PCI_EXP_LNKCTL, > > > > tmp16); > > > > - > > > > - tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD; > > > > - pcie_capability_write_word(adev->pdev, > > > > PCI_EXP_LNKCTL, > > > > - tmp16); > > > > + pcie_capability_set_word(root, PCI_EXP_LNKCTL, > > > > PCI_EXP_LNKCTL_HAWD); > > > > + pcie_capability_set_word(adev->pdev, > > > > PCI_EXP_LNKCTL, PCI_EXP_LNKCTL_HAWD); > > > > > > > > tmp = RREG32_PCIE(ixPCIE_LC_STATUS1); > > > > max_lw = (tmp & > > > > PCIE_LC_STATUS1__LC_DETECTED_LINK_WIDTH_MASK) >> > > > > @@ -1637,21 +1628,14 @@ static void cik_pcie_gen3_enable(struct > > > > amdgpu_device *adev) > > > > msleep(100); > > > > > > > > /* linkctl */ > > > > - pcie_capability_read_word(root, > > > > PCI_EXP_LNKCTL, > > > > - &tmp16); > > > > - tmp16 &= ~PCI_EXP_LNKCTL_HAWD; > > > > - tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD); > > > > - pcie_capability_write_word(root, > > > > PCI_EXP_LNKCTL, > > > > - tmp16); > > > > - > > > > - pcie_capability_read_word(adev->pdev, > > > > - PCI_EXP_LNKCTL, > > > > - &tmp16); > > > > - tmp16 &= ~PCI_EXP_LNKCTL_HAWD; > > > > - tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD); > > > > - pcie_capability_write_word(adev->pdev, > > > > - PCI_EXP_LNKCTL, > > > > - tmp16); > > > > + pcie_capability_clear_and_set_word(root,
Re: [PATCH v5 05/11] drm/amdgpu: Use RMW accessors for changing LNKCTL
On Fri, Jul 21, 2023 at 4:18 AM Ilpo Järvinen wrote: > > On Thu, 20 Jul 2023, Bjorn Helgaas wrote: > > > On Mon, Jul 17, 2023 at 03:04:57PM +0300, Ilpo Järvinen wrote: > > > Don't assume that only the driver would be accessing LNKCTL. ASPM > > > policy changes can trigger write to LNKCTL outside of driver's control. > > > And in the case of upstream bridge, the driver does not even own the > > > device it's changing the registers for. > > > > > > Use RMW capability accessors which do proper locking to avoid losing > > > concurrent updates to the register value. > > > > > > Fixes: a2e73f56fa62 ("drm/amdgpu: Add support for CIK parts") > > > Fixes: 62a37553414a ("drm/amdgpu: add si implementation v10") > > > Suggested-by: Lukas Wunner > > > Signed-off-by: Ilpo Järvinen > > > Cc: sta...@vger.kernel.org > > > > Do we have any reports of problems that are fixed by this patch (or by > > others in the series)? If not, I'm not sure it really fits the usual > > stable kernel criteria: > > > > https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/Documentation/process/stable-kernel-rules.rst?id=v6.4 > > I was on the edge with this. The answer to your direct question is no, > there are no such reports so it would be okay to leave stable out I think. > This applies to all patches in this series. > > Basically, this series came to be after Lukas noted the potential > concurrency issues with how LNKCTL is unprotected when reviewing > (internally) my bandwidth controller series. Then I went to look around > all LNKCTL usage and realized existing things might alreary have similar > issues. > > Do you want me to send another version w/o cc stable or you'll take care > of that? > > > > --- > > > drivers/gpu/drm/amd/amdgpu/cik.c | 36 +--- > > > drivers/gpu/drm/amd/amdgpu/si.c | 36 +--- > > > 2 files changed, 20 insertions(+), 52 deletions(-) > > > > > > diff --git a/drivers/gpu/drm/amd/amdgpu/cik.c > > > b/drivers/gpu/drm/amd/amdgpu/cik.c > > > index 5641cf05d856..e63abdf52b6c 100644 > > > --- a/drivers/gpu/drm/amd/amdgpu/cik.c > > > +++ b/drivers/gpu/drm/amd/amdgpu/cik.c > > > @@ -1574,17 +1574,8 @@ static void cik_pcie_gen3_enable(struct > > > amdgpu_device *adev) > > > u16 bridge_cfg2, gpu_cfg2; > > > u32 max_lw, current_lw, tmp; > > > > > > - pcie_capability_read_word(root, PCI_EXP_LNKCTL, > > > - &bridge_cfg); > > > - pcie_capability_read_word(adev->pdev, PCI_EXP_LNKCTL, > > > - &gpu_cfg); > > > - > > > - tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD; > > > - pcie_capability_write_word(root, PCI_EXP_LNKCTL, > > > tmp16); > > > - > > > - tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD; > > > - pcie_capability_write_word(adev->pdev, PCI_EXP_LNKCTL, > > > - tmp16); > > > + pcie_capability_set_word(root, PCI_EXP_LNKCTL, > > > PCI_EXP_LNKCTL_HAWD); > > > + pcie_capability_set_word(adev->pdev, PCI_EXP_LNKCTL, > > > PCI_EXP_LNKCTL_HAWD); > > > > > > tmp = RREG32_PCIE(ixPCIE_LC_STATUS1); > > > max_lw = (tmp & > > > PCIE_LC_STATUS1__LC_DETECTED_LINK_WIDTH_MASK) >> > > > @@ -1637,21 +1628,14 @@ static void cik_pcie_gen3_enable(struct > > > amdgpu_device *adev) > > > msleep(100); > > > > > > /* linkctl */ > > > - pcie_capability_read_word(root, > > > PCI_EXP_LNKCTL, > > > - &tmp16); > > > - tmp16 &= ~PCI_EXP_LNKCTL_HAWD; > > > - tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD); > > > - pcie_capability_write_word(root, > > > PCI_EXP_LNKCTL, > > > - tmp16); > > > - > > > - pcie_capability_read_word(adev->pdev, > > > - PCI_EXP_LNKCTL, > > > - &tmp16); > > > - tmp16 &= ~PCI_EXP_LNKCTL_HAWD; > > > - tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD); > > > - pcie_capability_write_word(adev->pdev, > > > - PCI_EXP_LNKCTL, > > > - tmp16); > > > + pcie_capability_clear_and_set_word(root, > > > PCI_EXP_LNKCTL, > > > + > > > PCI_EXP_LNKCTL_HAWD, > > > + bridge_cfg > > > & > > > +
Re: [PATCH v5 05/11] drm/amdgpu: Use RMW accessors for changing LNKCTL
On Thu, 20 Jul 2023, Bjorn Helgaas wrote: > On Mon, Jul 17, 2023 at 03:04:57PM +0300, Ilpo Järvinen wrote: > > Don't assume that only the driver would be accessing LNKCTL. ASPM > > policy changes can trigger write to LNKCTL outside of driver's control. > > And in the case of upstream bridge, the driver does not even own the > > device it's changing the registers for. > > > > Use RMW capability accessors which do proper locking to avoid losing > > concurrent updates to the register value. > > > > Fixes: a2e73f56fa62 ("drm/amdgpu: Add support for CIK parts") > > Fixes: 62a37553414a ("drm/amdgpu: add si implementation v10") > > Suggested-by: Lukas Wunner > > Signed-off-by: Ilpo Järvinen > > Cc: sta...@vger.kernel.org > > Do we have any reports of problems that are fixed by this patch (or by > others in the series)? If not, I'm not sure it really fits the usual > stable kernel criteria: > > https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/Documentation/process/stable-kernel-rules.rst?id=v6.4 I was on the edge with this. The answer to your direct question is no, there are no such reports so it would be okay to leave stable out I think. This applies to all patches in this series. Basically, this series came to be after Lukas noted the potential concurrency issues with how LNKCTL is unprotected when reviewing (internally) my bandwidth controller series. Then I went to look around all LNKCTL usage and realized existing things might alreary have similar issues. Do you want me to send another version w/o cc stable or you'll take care of that? > > --- > > drivers/gpu/drm/amd/amdgpu/cik.c | 36 +--- > > drivers/gpu/drm/amd/amdgpu/si.c | 36 +--- > > 2 files changed, 20 insertions(+), 52 deletions(-) > > > > diff --git a/drivers/gpu/drm/amd/amdgpu/cik.c > > b/drivers/gpu/drm/amd/amdgpu/cik.c > > index 5641cf05d856..e63abdf52b6c 100644 > > --- a/drivers/gpu/drm/amd/amdgpu/cik.c > > +++ b/drivers/gpu/drm/amd/amdgpu/cik.c > > @@ -1574,17 +1574,8 @@ static void cik_pcie_gen3_enable(struct > > amdgpu_device *adev) > > u16 bridge_cfg2, gpu_cfg2; > > u32 max_lw, current_lw, tmp; > > > > - pcie_capability_read_word(root, PCI_EXP_LNKCTL, > > - &bridge_cfg); > > - pcie_capability_read_word(adev->pdev, PCI_EXP_LNKCTL, > > - &gpu_cfg); > > - > > - tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD; > > - pcie_capability_write_word(root, PCI_EXP_LNKCTL, tmp16); > > - > > - tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD; > > - pcie_capability_write_word(adev->pdev, PCI_EXP_LNKCTL, > > - tmp16); > > + pcie_capability_set_word(root, PCI_EXP_LNKCTL, > > PCI_EXP_LNKCTL_HAWD); > > + pcie_capability_set_word(adev->pdev, PCI_EXP_LNKCTL, > > PCI_EXP_LNKCTL_HAWD); > > > > tmp = RREG32_PCIE(ixPCIE_LC_STATUS1); > > max_lw = (tmp & > > PCIE_LC_STATUS1__LC_DETECTED_LINK_WIDTH_MASK) >> > > @@ -1637,21 +1628,14 @@ static void cik_pcie_gen3_enable(struct > > amdgpu_device *adev) > > msleep(100); > > > > /* linkctl */ > > - pcie_capability_read_word(root, PCI_EXP_LNKCTL, > > - &tmp16); > > - tmp16 &= ~PCI_EXP_LNKCTL_HAWD; > > - tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD); > > - pcie_capability_write_word(root, PCI_EXP_LNKCTL, > > - tmp16); > > - > > - pcie_capability_read_word(adev->pdev, > > - PCI_EXP_LNKCTL, > > - &tmp16); > > - tmp16 &= ~PCI_EXP_LNKCTL_HAWD; > > - tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD); > > - pcie_capability_write_word(adev->pdev, > > - PCI_EXP_LNKCTL, > > - tmp16); > > + pcie_capability_clear_and_set_word(root, > > PCI_EXP_LNKCTL, > > + > > PCI_EXP_LNKCTL_HAWD, > > + bridge_cfg & > > + > > PCI_EXP_LNKCTL_HAWD); > > + pcie_capability_clear_and_set_word(adev->pdev, > > PCI_EXP_LNKCTL, > > + > > PCI_EXP_LNKCTL_HAWD, > > +
Re: [PATCH v5 05/11] drm/amdgpu: Use RMW accessors for changing LNKCTL
On Mon, Jul 17, 2023 at 03:04:57PM +0300, Ilpo Järvinen wrote: > Don't assume that only the driver would be accessing LNKCTL. ASPM > policy changes can trigger write to LNKCTL outside of driver's control. > And in the case of upstream bridge, the driver does not even own the > device it's changing the registers for. > > Use RMW capability accessors which do proper locking to avoid losing > concurrent updates to the register value. > > Fixes: a2e73f56fa62 ("drm/amdgpu: Add support for CIK parts") > Fixes: 62a37553414a ("drm/amdgpu: add si implementation v10") > Suggested-by: Lukas Wunner > Signed-off-by: Ilpo Järvinen > Cc: sta...@vger.kernel.org Do we have any reports of problems that are fixed by this patch (or by others in the series)? If not, I'm not sure it really fits the usual stable kernel criteria: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/Documentation/process/stable-kernel-rules.rst?id=v6.4 > --- > drivers/gpu/drm/amd/amdgpu/cik.c | 36 +--- > drivers/gpu/drm/amd/amdgpu/si.c | 36 +--- > 2 files changed, 20 insertions(+), 52 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/cik.c > b/drivers/gpu/drm/amd/amdgpu/cik.c > index 5641cf05d856..e63abdf52b6c 100644 > --- a/drivers/gpu/drm/amd/amdgpu/cik.c > +++ b/drivers/gpu/drm/amd/amdgpu/cik.c > @@ -1574,17 +1574,8 @@ static void cik_pcie_gen3_enable(struct amdgpu_device > *adev) > u16 bridge_cfg2, gpu_cfg2; > u32 max_lw, current_lw, tmp; > > - pcie_capability_read_word(root, PCI_EXP_LNKCTL, > - &bridge_cfg); > - pcie_capability_read_word(adev->pdev, PCI_EXP_LNKCTL, > - &gpu_cfg); > - > - tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD; > - pcie_capability_write_word(root, PCI_EXP_LNKCTL, tmp16); > - > - tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD; > - pcie_capability_write_word(adev->pdev, PCI_EXP_LNKCTL, > -tmp16); > + pcie_capability_set_word(root, PCI_EXP_LNKCTL, > PCI_EXP_LNKCTL_HAWD); > + pcie_capability_set_word(adev->pdev, PCI_EXP_LNKCTL, > PCI_EXP_LNKCTL_HAWD); > > tmp = RREG32_PCIE(ixPCIE_LC_STATUS1); > max_lw = (tmp & > PCIE_LC_STATUS1__LC_DETECTED_LINK_WIDTH_MASK) >> > @@ -1637,21 +1628,14 @@ static void cik_pcie_gen3_enable(struct amdgpu_device > *adev) > msleep(100); > > /* linkctl */ > - pcie_capability_read_word(root, PCI_EXP_LNKCTL, > - &tmp16); > - tmp16 &= ~PCI_EXP_LNKCTL_HAWD; > - tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD); > - pcie_capability_write_word(root, PCI_EXP_LNKCTL, > -tmp16); > - > - pcie_capability_read_word(adev->pdev, > - PCI_EXP_LNKCTL, > - &tmp16); > - tmp16 &= ~PCI_EXP_LNKCTL_HAWD; > - tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD); > - pcie_capability_write_word(adev->pdev, > -PCI_EXP_LNKCTL, > -tmp16); > + pcie_capability_clear_and_set_word(root, > PCI_EXP_LNKCTL, > + > PCI_EXP_LNKCTL_HAWD, > +bridge_cfg & > + > PCI_EXP_LNKCTL_HAWD); > + pcie_capability_clear_and_set_word(adev->pdev, > PCI_EXP_LNKCTL, > + > PCI_EXP_LNKCTL_HAWD, > +gpu_cfg & > + > PCI_EXP_LNKCTL_HAWD); Wow, there's a lot of pointless-looking work going on here: set root PCI_EXP_LNKCTL_HAWD set GPU PCI_EXP_LNKCTL_HAWD for (i = 0; i < 10; i++) { read root PCI_EXP_LNKCTL read GPU PCI_EXP_LNKCTL clear root PCI_EXP_LNKCTL_HAWD if (root PCI_EXP_LNKCTL_HAWD was set) set root PCI_EXP_LNKCTL_HAWD clear GPU PCI_EXP_LNKCTL_HAWD if (GPU PCI_EXP_LNKCTL_HAWD was set) set GPU PCI_EXP_LNKCTL_HAWD } If it really *is* pointless, it would be nice to clean it up, but that wouldn't be material for this
[PATCH v5 05/11] drm/amdgpu: Use RMW accessors for changing LNKCTL
Don't assume that only the driver would be accessing LNKCTL. ASPM policy changes can trigger write to LNKCTL outside of driver's control. And in the case of upstream bridge, the driver does not even own the device it's changing the registers for. Use RMW capability accessors which do proper locking to avoid losing concurrent updates to the register value. Fixes: a2e73f56fa62 ("drm/amdgpu: Add support for CIK parts") Fixes: 62a37553414a ("drm/amdgpu: add si implementation v10") Suggested-by: Lukas Wunner Signed-off-by: Ilpo Järvinen Cc: sta...@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/cik.c | 36 +--- drivers/gpu/drm/amd/amdgpu/si.c | 36 +--- 2 files changed, 20 insertions(+), 52 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/cik.c b/drivers/gpu/drm/amd/amdgpu/cik.c index 5641cf05d856..e63abdf52b6c 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik.c +++ b/drivers/gpu/drm/amd/amdgpu/cik.c @@ -1574,17 +1574,8 @@ static void cik_pcie_gen3_enable(struct amdgpu_device *adev) u16 bridge_cfg2, gpu_cfg2; u32 max_lw, current_lw, tmp; - pcie_capability_read_word(root, PCI_EXP_LNKCTL, - &bridge_cfg); - pcie_capability_read_word(adev->pdev, PCI_EXP_LNKCTL, - &gpu_cfg); - - tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD; - pcie_capability_write_word(root, PCI_EXP_LNKCTL, tmp16); - - tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD; - pcie_capability_write_word(adev->pdev, PCI_EXP_LNKCTL, - tmp16); + pcie_capability_set_word(root, PCI_EXP_LNKCTL, PCI_EXP_LNKCTL_HAWD); + pcie_capability_set_word(adev->pdev, PCI_EXP_LNKCTL, PCI_EXP_LNKCTL_HAWD); tmp = RREG32_PCIE(ixPCIE_LC_STATUS1); max_lw = (tmp & PCIE_LC_STATUS1__LC_DETECTED_LINK_WIDTH_MASK) >> @@ -1637,21 +1628,14 @@ static void cik_pcie_gen3_enable(struct amdgpu_device *adev) msleep(100); /* linkctl */ - pcie_capability_read_word(root, PCI_EXP_LNKCTL, - &tmp16); - tmp16 &= ~PCI_EXP_LNKCTL_HAWD; - tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD); - pcie_capability_write_word(root, PCI_EXP_LNKCTL, - tmp16); - - pcie_capability_read_word(adev->pdev, - PCI_EXP_LNKCTL, - &tmp16); - tmp16 &= ~PCI_EXP_LNKCTL_HAWD; - tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD); - pcie_capability_write_word(adev->pdev, - PCI_EXP_LNKCTL, - tmp16); + pcie_capability_clear_and_set_word(root, PCI_EXP_LNKCTL, + PCI_EXP_LNKCTL_HAWD, + bridge_cfg & + PCI_EXP_LNKCTL_HAWD); + pcie_capability_clear_and_set_word(adev->pdev, PCI_EXP_LNKCTL, + PCI_EXP_LNKCTL_HAWD, + gpu_cfg & + PCI_EXP_LNKCTL_HAWD); /* linkctl2 */ pcie_capability_read_word(root, PCI_EXP_LNKCTL2, diff --git a/drivers/gpu/drm/amd/amdgpu/si.c b/drivers/gpu/drm/amd/amdgpu/si.c index f64b87b11b1b..4b81f29e5fd5 100644 --- a/drivers/gpu/drm/amd/amdgpu/si.c +++ b/drivers/gpu/drm/amd/amdgpu/si.c @@ -2276,17 +2276,8 @@ static void si_pcie_gen3_enable(struct amdgpu_device *adev) u16 bridge_cfg2, gpu_cfg2; u32 max_lw, current_lw, tmp; - pcie_capability_read_word(root, PCI_EXP_LNKCTL, - &bridge_cfg); - pcie_capability_read_word(adev->pdev, PCI_EXP_LNKCTL, - &gpu_cfg); - - tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD; - pcie_capability_write_word(root, PCI_EXP_LNKCTL, tmp16); - - tmp16 = gpu_cfg | PCI