I think TCCs are global, because all memory traffic from gfx engines+cp+sdma has to go through TCCs, e.g. memory requests from different SEs accessing the same memory address go to the same TCC.
Marek On Tue, Sep 24, 2019 at 10:58 PM Alex Deucher <alexdeuc...@gmail.com> wrote: > On Tue, Sep 24, 2019 at 6:29 PM Marek Olšák <mar...@gmail.com> wrote: > > > > From: Marek Olšák <marek.ol...@amd.com> > > > > UMDs need this for correct programming of harvested chips. > > > > Signed-off-by: Marek Olšák <marek.ol...@amd.com> > > --- > > drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 3 ++- > > drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h | 1 + > > drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 2 ++ > > drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 11 +++++++++++ > > include/uapi/drm/amdgpu_drm.h | 2 ++ > > 5 files changed, 18 insertions(+), 1 deletion(-) > > > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c > > index f82d634cf3f9..b70b30378c20 100644 > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c > > @@ -75,23 +75,24 @@ > > * - 3.25.0 - Add support for sensor query info (stable pstate > sclk/mclk). > > * - 3.26.0 - GFX9: Process AMDGPU_IB_FLAG_TC_WB_NOT_INVALIDATE. > > * - 3.27.0 - Add new chunk to to AMDGPU_CS to enable BO_LIST creation. > > * - 3.28.0 - Add AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES > > * - 3.29.0 - Add AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID > > * - 3.30.0 - Add AMDGPU_SCHED_OP_CONTEXT_PRIORITY_OVERRIDE. > > * - 3.31.0 - Add support for per-flip tiling attribute changes with DC > > * - 3.32.0 - Add syncobj timeline support to AMDGPU_CS. > > * - 3.33.0 - Fixes for GDS ENOMEM failures in AMDGPU_CS. > > * - 3.34.0 - Non-DC can flip correctly between buffers with different > pitches > > + * - 3.35.0 - Add drm_amdgpu_info_device::tcc_disabled_mask > > */ > > #define KMS_DRIVER_MAJOR 3 > > -#define KMS_DRIVER_MINOR 34 > > +#define KMS_DRIVER_MINOR 35 > > #define KMS_DRIVER_PATCHLEVEL 0 > > > > #define AMDGPU_MAX_TIMEOUT_PARAM_LENTH 256 > > > > int amdgpu_vram_limit = 0; > > int amdgpu_vis_vram_limit = 0; > > int amdgpu_gart_size = -1; /* auto */ > > int amdgpu_gtt_size = -1; /* auto */ > > int amdgpu_moverate = -1; /* auto */ > > int amdgpu_benchmarking = 0; > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h > b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h > > index 59c5464c96be..88dccff41dff 100644 > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h > > @@ -158,20 +158,21 @@ struct amdgpu_gfx_config { > > struct amdgpu_rb_config > rb_config[AMDGPU_GFX_MAX_SE][AMDGPU_GFX_MAX_SH_PER_SE]; > > > > /* gfx configure feature */ > > uint32_t double_offchip_lds_buf; > > /* cached value of DB_DEBUG2 */ > > uint32_t db_debug2; > > /* gfx10 specific config */ > > uint32_t num_sc_per_sh; > > uint32_t num_packer_per_sc; > > uint32_t pa_sc_tile_steering_override; > > + uint64_t tcc_disabled_mask; > > }; > > > > struct amdgpu_cu_info { > > uint32_t simd_per_cu; > > uint32_t max_waves_per_simd; > > uint32_t wave_front_size; > > uint32_t max_scratch_slots_per_cu; > > uint32_t lds_size; > > > > /* total active CU number */ > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c > > index 91f5aaf99861..7356efe7e2d3 100644 > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c > > @@ -775,20 +775,22 @@ static int amdgpu_info_ioctl(struct drm_device > *dev, void *data, struct drm_file > > dev_info.num_cu_per_sh = adev->gfx.config.max_cu_per_sh; > > dev_info.num_tcc_blocks = > adev->gfx.config.max_texture_channel_caches; > > dev_info.gs_vgt_table_depth = > adev->gfx.config.gs_vgt_table_depth; > > dev_info.gs_prim_buffer_depth = > adev->gfx.config.gs_prim_buffer_depth; > > dev_info.max_gs_waves_per_vgt = > adev->gfx.config.max_gs_threads; > > > > if (adev->family >= AMDGPU_FAMILY_NV) > > dev_info.pa_sc_tile_steering_override = > > > adev->gfx.config.pa_sc_tile_steering_override; > > > > + dev_info.tcc_disabled_mask = > adev->gfx.config.tcc_disabled_mask; > > + > > return copy_to_user(out, &dev_info, > > min((size_t)size, sizeof(dev_info))) > ? -EFAULT : 0; > > } > > case AMDGPU_INFO_VCE_CLOCK_TABLE: { > > unsigned i; > > struct drm_amdgpu_info_vce_clock_table vce_clk_table = > {}; > > struct amd_vce_state *vce_state; > > > > for (i = 0; i < AMDGPU_VCE_CLOCK_TABLE_ENTRIES; i++) { > > vce_state = amdgpu_dpm_get_vce_clock_state(adev, > i); > > diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c > b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c > > index cfc0952f6175..ca01643fa0c8 100644 > > --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c > > +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c > > @@ -1684,31 +1684,42 @@ static void gfx_v10_0_tcp_harvest(struct > amdgpu_device *adev) > > tmp |= (gcrd_targets_disable_tcp & > gcrd_targets_disable_mask); > > WREG32_SOC15(GC, 0, > mmGCRD_SA_TARGETS_DISABLE, tmp); > > } > > } > > > > gfx_v10_0_select_se_sh(adev, 0xffffffff, 0xffffffff, > 0xffffffff); > > mutex_unlock(&adev->grbm_idx_mutex); > > } > > } > > > > +static void gfx_v10_0_get_tcc_info(struct amdgpu_device *adev) > > +{ > > + uint32_t tcc_disable = RREG32_SOC15(GC, 0, mmCGTS_TCC_DISABLE) | > > + RREG32_SOC15(GC, 0, > mmCGTS_USER_TCC_DISABLE); > > + > > + adev->gfx.config.tcc_disabled_mask = > > + REG_GET_FIELD(tcc_disable, CGTS_TCC_DISABLE, > TCC_DISABLE) | > > + (REG_GET_FIELD(tcc_disable, CGTS_TCC_DISABLE, > HI_TCC_DISABLE) << 16); > > +} > > Are TCCs per SE/SH? If so, you'll need to walk each instance and > create a mask from each instance like we do for setup_rb and cu_info. > > Alex > > > + > > static void gfx_v10_0_constants_init(struct amdgpu_device *adev) > > { > > u32 tmp; > > int i; > > > > WREG32_FIELD15(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff); > > > > gfx_v10_0_tiling_mode_table_init(adev); > > > > gfx_v10_0_setup_rb(adev); > > gfx_v10_0_get_cu_info(adev, &adev->gfx.cu_info); > > + gfx_v10_0_get_tcc_info(adev); > > adev->gfx.config.pa_sc_tile_steering_override = > > gfx_v10_0_init_pa_sc_tile_steering_override(adev); > > > > /* XXX SH_MEM regs */ > > /* where to put LDS, scratch, GPUVM in FSA64 space */ > > mutex_lock(&adev->srbm_mutex); > > for (i = 0; i < > adev->vm_manager.id_mgr[AMDGPU_GFXHUB_0].num_ids; i++) { > > nv_grbm_select(adev, 0, 0, 0, i); > > /* CP and shaders */ > > WREG32_SOC15(GC, 0, mmSH_MEM_CONFIG, > DEFAULT_SH_MEM_CONFIG); > > diff --git a/include/uapi/drm/amdgpu_drm.h > b/include/uapi/drm/amdgpu_drm.h > > index f3ad429173e3..a69e31929155 100644 > > --- a/include/uapi/drm/amdgpu_drm.h > > +++ b/include/uapi/drm/amdgpu_drm.h > > @@ -1001,20 +1001,22 @@ struct drm_amdgpu_info_device { > > __u32 max_gs_waves_per_vgt; > > __u32 _pad1; > > /* always on cu bitmap */ > > __u32 cu_ao_bitmap[4][4]; > > /** Starting high virtual address for UMDs. */ > > __u64 high_va_offset; > > /** The maximum high virtual address */ > > __u64 high_va_max; > > /* gfx10 pa_sc_tile_steering_override */ > > __u32 pa_sc_tile_steering_override; > > + /* disabled TCCs */ > > + __u64 tcc_disabled_mask; > > }; > > > > struct drm_amdgpu_info_hw_ip { > > /** Version of h/w IP */ > > __u32 hw_ip_version_major; > > __u32 hw_ip_version_minor; > > /** Capabilities */ > > __u64 capabilities_flags; > > /** command buffer address start alignment*/ > > __u32 ib_start_alignment; > > -- > > 2.17.1 > > > > _______________________________________________ > > amd-gfx mailing list > > amd-gfx@lists.freedesktop.org > > https://lists.freedesktop.org/mailman/listinfo/amd-gfx >
_______________________________________________ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx