See inline.

On 2019-04-23 3:23 p.m., Zeng, Oak wrote:
> Remap HDP_MEM_COHERENCY_FLUSH_CNTL and HDP_REG_COHERENCY_FLUSH_CNTL
> to an empty page in mmio space. We will later map this page to process
> space so application can flush hdp. This can't be done properly at
> those registers' original location because it will expose more than
> desired registers to process space.
>
> v2: Use explicit register hole location
> v3: Moved remapped hdp registers into adev struct
> v4: Use more generic name for remapped page
>      Expose register offset in kfd_ioctl.h
> v5: Move hdp register remap function to nbio ip function
>
> Change-Id: Ia8d27c0c9a082711d16bbf55602bf5712a47b6d6
> Signed-off-by: Oak Zeng <oak.z...@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu.h    |  7 +++++++
>   drivers/gpu/drm/amd/amdgpu/cik.c       |  1 +
>   drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c | 15 ++++++++++++---
>   drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c | 15 ++++++++++++---
>   drivers/gpu/drm/amd/amdgpu/si.c        |  1 +
>   drivers/gpu/drm/amd/amdgpu/soc15.c     | 11 +++++++++++
>   drivers/gpu/drm/amd/amdgpu/vi.c        |  1 +
>   include/uapi/linux/kfd_ioctl.h         |  7 +++++++
>   8 files changed, 52 insertions(+), 6 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index bc96ec4..e16dcee 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -642,6 +642,11 @@ struct nbio_hdp_flush_reg {
>       u32 ref_and_mask_sdma1;
>   };
>   
> +struct amdgpu_mmio_remap {
> +     u32 reg_offset;
> +     resource_size_t bus_addr;
> +};
> +
>   struct amdgpu_nbio_funcs {
>       const struct nbio_hdp_flush_reg *hdp_flush_reg;
>       u32 (*get_hdp_flush_req_offset)(struct amdgpu_device *adev);
> @@ -669,6 +674,7 @@ struct amdgpu_nbio_funcs {
>       void (*ih_control)(struct amdgpu_device *adev);
>       void (*init_registers)(struct amdgpu_device *adev);
>       void (*detect_hw_virt)(struct amdgpu_device *adev);
> +     void (*remap_hdp_registers)(struct amdgpu_device *adev);
>   };
>   
>   struct amdgpu_df_funcs {
> @@ -767,6 +773,7 @@ struct amdgpu_device {
>       void __iomem                    *rmmio;
>       /* protects concurrent MM_INDEX/DATA based register access */
>       spinlock_t mmio_idx_lock;
> +     struct amdgpu_mmio_remap        rmmio_remap;
>       /* protects concurrent SMC based register access */
>       spinlock_t smc_idx_lock;
>       amdgpu_rreg_t                   smc_rreg;
> diff --git a/drivers/gpu/drm/amd/amdgpu/cik.c 
> b/drivers/gpu/drm/amd/amdgpu/cik.c
> index 07c1f23..3f7ec6a 100644
> --- a/drivers/gpu/drm/amd/amdgpu/cik.c
> +++ b/drivers/gpu/drm/amd/amdgpu/cik.c
> @@ -1827,6 +1827,7 @@ static int cik_common_early_init(void *handle)
>   {
>       struct amdgpu_device *adev = (struct amdgpu_device *)handle;
>   
> +     adev->rmmio_remap.bus_addr = ULLONG_MAX;
>       adev->smc_rreg = &cik_smc_rreg;
>       adev->smc_wreg = &cik_smc_wreg;
>       adev->pcie_rreg = &cik_pcie_rreg;
> diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c 
> b/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c
> index 1cdb98a..83f1f75 100644
> --- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c
> @@ -29,9 +29,18 @@
>   #include "nbio/nbio_7_0_sh_mask.h"
>   #include "nbio/nbio_7_0_smn.h"
>   #include "vega10_enum.h"
> +#include <uapi/linux/kfd_ioctl.h>
>   
>   #define smnNBIF_MGCG_CTRL_LCLK      0x1013a05c
>   
> +static void nbio_v7_0_remap_hdp_registers(struct amdgpu_device *adev)
> +{
> +     WREG32_SOC15(NBIO, 0, mmREMAP_HDP_MEM_FLUSH_CNTL,
> +             adev->rmmio_remap.reg_offset << 2 + HDP_MEM_FLUSH_CNTL);

I don't think this does what you intend. I think + binds stronger than 
<<, so you should write this as

     (adev->rmmio_remap.reg_offset << 2) + HDP_MEM_FLUSH_CNTL


> +     WREG32_SOC15(NBIO, 0, mmREMAP_HDP_REG_FLUSH_CNTL,
> +             adev->rmmio_remap.reg_offset << 2 + HDP_REG_FLUSH_CNTL);

Same as above.


> +}
> +
>   static u32 nbio_v7_0_get_rev_id(struct amdgpu_device *adev)
>   {
>           u32 tmp = RREG32_SOC15(NBIO, 0, mmRCC_DEV0_EPF0_STRAP0);
> @@ -55,10 +64,9 @@ static void nbio_v7_0_hdp_flush(struct amdgpu_device *adev,
>                               struct amdgpu_ring *ring)
>   {
>       if (!ring || !ring->funcs->emit_wreg)
> -             WREG32_SOC15_NO_KIQ(NBIO, 0, mmHDP_MEM_COHERENCY_FLUSH_CNTL, 0);
> +             WREG32_NO_KIQ(adev->rmmio_remap.reg_offset + 
> HDP_MEM_FLUSH_CNTL, 0);

Are you sure this is correct? As I understand it from the above, 
adev->rmmio_remap.reg_offset is in dwords, HDP_MEM_FLUSH_CNTL is in 
bytes. Something will need to be shifted.


>       else
> -             amdgpu_ring_emit_wreg(ring, SOC15_REG_OFFSET(
> -                     NBIO, 0, mmHDP_MEM_COHERENCY_FLUSH_CNTL), 0);
> +             amdgpu_ring_emit_wreg(ring, adev->rmmio_remap.reg_offset + 
> HDP_MEM_FLUSH_CNTL, 0);

Same as above.


>   }
>   
>   static u32 nbio_v7_0_get_memsize(struct amdgpu_device *adev)
> @@ -283,4 +291,5 @@ const struct amdgpu_nbio_funcs nbio_v7_0_funcs = {
>       .ih_control = nbio_v7_0_ih_control,
>       .init_registers = nbio_v7_0_init_registers,
>       .detect_hw_virt = nbio_v7_0_detect_hw_virt,
> +     .remap_hdp_registers = nbio_v7_0_remap_hdp_registers,
>   };
> diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c 
> b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c
> index c69d515..fa67772 100644
> --- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c
> +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c
> @@ -27,9 +27,18 @@
>   #include "nbio/nbio_7_4_offset.h"
>   #include "nbio/nbio_7_4_sh_mask.h"
>   #include "nbio/nbio_7_4_0_smn.h"
> +#include <uapi/linux/kfd_ioctl.h>
>   
>   #define smnNBIF_MGCG_CTRL_LCLK      0x1013a21c
>   
> +static void nbio_v7_4_remap_hdp_registers(struct amdgpu_device *adev)
> +{
> +     WREG32_SOC15(NBIO, 0, mmREMAP_HDP_MEM_FLUSH_CNTL,
> +             adev->rmmio_remap.reg_offset << 2 + HDP_MEM_FLUSH_CNTL);
> +     WREG32_SOC15(NBIO, 0, mmREMAP_HDP_REG_FLUSH_CNTL,
> +             adev->rmmio_remap.reg_offset << 2 + HDP_REG_FLUSH_CNTL);

Operator precedence. See above.


> +}
> +
>   static u32 nbio_v7_4_get_rev_id(struct amdgpu_device *adev)
>   {
>       u32 tmp = RREG32_SOC15(NBIO, 0, mmRCC_DEV0_EPF0_STRAP0);
> @@ -53,10 +62,9 @@ static void nbio_v7_4_hdp_flush(struct amdgpu_device *adev,
>                               struct amdgpu_ring *ring)
>   {
>       if (!ring || !ring->funcs->emit_wreg)
> -             WREG32_SOC15_NO_KIQ(NBIO, 0, mmHDP_MEM_COHERENCY_FLUSH_CNTL, 0);
> +             WREG32_NO_KIQ(adev->rmmio_remap.reg_offset + 
> HDP_MEM_FLUSH_CNTL, 0);
Are adev->rmmio_remap.reg_offset and HDP_MEM_FLUSH_CNTL in the same units?
>       else
> -             amdgpu_ring_emit_wreg(ring, SOC15_REG_OFFSET(
> -                     NBIO, 0, mmHDP_MEM_COHERENCY_FLUSH_CNTL), 0);
> +             amdgpu_ring_emit_wreg(ring, adev->rmmio_remap.reg_offset + 
> HDP_MEM_FLUSH_CNTL, 0);
>   }
>   
>   static u32 nbio_v7_4_get_memsize(struct amdgpu_device *adev)
> @@ -262,4 +270,5 @@ const struct amdgpu_nbio_funcs nbio_v7_4_funcs = {
>       .ih_control = nbio_v7_4_ih_control,
>       .init_registers = nbio_v7_4_init_registers,
>       .detect_hw_virt = nbio_v7_4_detect_hw_virt,
> +     .remap_hdp_registers = nbio_v7_4_remap_hdp_registers,
>   };
> diff --git a/drivers/gpu/drm/amd/amdgpu/si.c b/drivers/gpu/drm/amd/amdgpu/si.c
> index 9d8df68..c6b89c2 100644
> --- a/drivers/gpu/drm/amd/amdgpu/si.c
> +++ b/drivers/gpu/drm/amd/amdgpu/si.c
> @@ -1405,6 +1405,7 @@ static int si_common_early_init(void *handle)
>   {
>       struct amdgpu_device *adev = (struct amdgpu_device *)handle;
>   
> +     adev->rmmio_remap.bus_addr = ULLONG_MAX;
>       adev->smc_rreg = &si_smc_rreg;
>       adev->smc_wreg = &si_smc_wreg;
>       adev->pcie_rreg = &si_pcie_rreg;
> diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c 
> b/drivers/gpu/drm/amd/amdgpu/soc15.c
> index bdb5ad9..3685944 100644
> --- a/drivers/gpu/drm/amd/amdgpu/soc15.c
> +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c
> @@ -44,6 +44,7 @@
>   #include "smuio/smuio_9_0_offset.h"
>   #include "smuio/smuio_9_0_sh_mask.h"
>   #include "nbio/nbio_7_0_default.h"
> +#include "nbio/nbio_7_0_offset.h"
>   #include "nbio/nbio_7_0_sh_mask.h"
>   #include "nbio/nbio_7_0_smn.h"
>   #include "mp/mp_9_0_offset.h"
> @@ -64,6 +65,7 @@
>   #include "dce_virtual.h"
>   #include "mxgpu_ai.h"
>   #include "amdgpu_smu.h"
> +#include <uapi/linux/kfd_ioctl.h>
>   
>   #define mmMP0_MISC_CGTT_CTRL0                                               
>                     0x01b9
>   #define mmMP0_MISC_CGTT_CTRL0_BASE_IDX                                      
>                     0
> @@ -777,8 +779,11 @@ static const struct amdgpu_asic_funcs vega20_asic_funcs =
>   
>   static int soc15_common_early_init(void *handle)
>   {
> +#define MMIO_REG_HOLE_OFFSET (0x80000 - PAGE_SIZE)
>       struct amdgpu_device *adev = (struct amdgpu_device *)handle;
>   
> +     adev->rmmio_remap.reg_offset = (MMIO_REG_HOLE_OFFSET) >> 2;

Why is this shifted? I think this creates some of the confusion above 
where things have different units.


> +     adev->rmmio_remap.bus_addr = adev->rmmio_base + MMIO_REG_HOLE_OFFSET;
>       adev->smc_rreg = NULL;
>       adev->smc_wreg = NULL;
>       adev->pcie_rreg = &soc15_pcie_rreg;
> @@ -1007,6 +1012,12 @@ static int soc15_common_hw_init(void *handle)
>       soc15_program_aspm(adev);
>       /* setup nbio registers */
>       adev->nbio_funcs->init_registers(adev);
> +     /* remap HDP registers to a hole in mmio space,
> +      * for the purpose of expose those registers
> +      * to process space
> +      */
> +     if (adev->nbio_funcs->remap_hdp_registers)
> +             adev->nbio_funcs->remap_hdp_registers(adev);
>       /* enable the doorbell aperture */
>       soc15_enable_doorbell_aperture(adev, true);
>       /* HW doorbell routing policy: doorbell writing not
> diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c
> index 5e5b42a..44565b23 100644
> --- a/drivers/gpu/drm/amd/amdgpu/vi.c
> +++ b/drivers/gpu/drm/amd/amdgpu/vi.c
> @@ -1037,6 +1037,7 @@ static int vi_common_early_init(void *handle)
>               adev->smc_rreg = &vi_smc_rreg;
>               adev->smc_wreg = &vi_smc_wreg;
>       }
> +     adev->rmmio_remap.bus_addr = ULLONG_MAX;
>       adev->pcie_rreg = &vi_pcie_rreg;
>       adev->pcie_wreg = &vi_pcie_wreg;
>       adev->uvd_ctx_rreg = &vi_uvd_ctx_rreg;
> diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h
> index dc067ed..7524e6e 100644
> --- a/include/uapi/linux/kfd_ioctl.h
> +++ b/include/uapi/linux/kfd_ioctl.h
> @@ -426,6 +426,13 @@ struct kfd_ioctl_import_dmabuf_args {
>       __u32 dmabuf_fd;        /* to KFD */
>   };
>   
> +/* Register offset inside the remapped mmio page
> + */
> +enum kfd_mmio_remap {
> +     HDP_MEM_FLUSH_CNTL = 0,

This needs some prefix to disambiguate. Something to make it clear that 
this is not the MMIO register address, but an offset in a remapped 
address range. This is a bit verbose, but it's the best I can come up with:

KFD_MMIO_REMAP_HPD_MEM_FLUSH_CNTL = 0,
KFD_MMIO_REMAP_HDP_REG_FLUSH_CNTL = 4,

Regards,
   Felix


> +     HDP_REG_FLUSH_CNTL = 4,
> +};
> +
>   #define AMDKFD_IOCTL_BASE 'K'
>   #define AMDKFD_IO(nr)                       _IO(AMDKFD_IOCTL_BASE, nr)
>   #define AMDKFD_IOR(nr, type)                _IOR(AMDKFD_IOCTL_BASE, nr, 
> type)
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Reply via email to