r-b
On Tue, Jul 30, 2019 at 6:29 PM Samuel Pitoiset <samuel.pitoi...@gmail.com> wrote: > > It can be enabled with RADV_PERFTEST=cswave32. > > Signed-off-by: Samuel Pitoiset <samuel.pitoi...@gmail.com> > --- > src/amd/vulkan/radv_debug.h | 1 + > src/amd/vulkan/radv_device.c | 12 +++++++++++- > src/amd/vulkan/radv_nir_to_llvm.c | 14 +++++++++++++- > src/amd/vulkan/radv_pipeline.c | 3 ++- > src/amd/vulkan/radv_private.h | 3 +++ > src/amd/vulkan/radv_shader.c | 25 ++++++++++++++++++++++--- > src/amd/vulkan/radv_shader.h | 1 + > 7 files changed, 53 insertions(+), 6 deletions(-) > > diff --git a/src/amd/vulkan/radv_debug.h b/src/amd/vulkan/radv_debug.h > index 723fabda57f..6414e882676 100644 > --- a/src/amd/vulkan/radv_debug.h > +++ b/src/amd/vulkan/radv_debug.h > @@ -64,6 +64,7 @@ enum { > RADV_PERFTEST_BO_LIST = 0x20, > RADV_PERFTEST_SHADER_BALLOT = 0x40, > RADV_PERFTEST_TC_COMPAT_CMASK = 0x80, > + RADV_PERFTEST_CS_WAVE_32 = 0x100, > }; > > bool > diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c > index 65e3ccf91ad..29be192443a 100644 > --- a/src/amd/vulkan/radv_device.c > +++ b/src/amd/vulkan/radv_device.c > @@ -383,6 +383,14 @@ radv_physical_device_init(struct radv_physical_device > *device, > > device->use_shader_ballot = device->instance->perftest_flags & > RADV_PERFTEST_SHADER_BALLOT; > > + /* Determine the number of threads per wave for all stages. */ > + device->cs_wave_size = 64; > + > + if (device->rad_info.chip_class >= GFX10) { > + if (device->instance->perftest_flags & > RADV_PERFTEST_CS_WAVE_32) > + device->cs_wave_size = 32; > + } > + > radv_physical_device_init_mem_types(device); > radv_fill_device_extension_table(device, > &device->supported_extensions); > > @@ -494,6 +502,7 @@ static const struct debug_control radv_perftest_options[] > = { > {"bolist", RADV_PERFTEST_BO_LIST}, > {"shader_ballot", RADV_PERFTEST_SHADER_BALLOT}, > {"tccompatcmask", RADV_PERFTEST_TC_COMPAT_CMASK}, > + {"cswave32", RADV_PERFTEST_CS_WAVE_32}, > {NULL, 0} > }; > > @@ -1930,7 +1939,8 @@ VkResult radv_CreateDevice( > device->scratch_waves = MAX2(32 * > physical_device->rad_info.num_good_compute_units, > max_threads_per_block / 64); > > - device->dispatch_initiator = S_00B800_COMPUTE_SHADER_EN(1); > + device->dispatch_initiator = S_00B800_COMPUTE_SHADER_EN(1) | > + > S_00B800_CS_W32_EN(device->physical_device->cs_wave_size == 32); > > if (device->physical_device->rad_info.chip_class >= GFX7) { > /* If the KMD allows it (there is a KMD hw register for it), > diff --git a/src/amd/vulkan/radv_nir_to_llvm.c > b/src/amd/vulkan/radv_nir_to_llvm.c > index 020c6d17771..feaab8f6370 100644 > --- a/src/amd/vulkan/radv_nir_to_llvm.c > +++ b/src/amd/vulkan/radv_nir_to_llvm.c > @@ -4317,6 +4317,15 @@ static void declare_esgs_ring(struct > radv_shader_context *ctx) > LLVMSetAlignment(ctx->esgs_ring, 64 * 1024); > } > > +static uint8_t > +radv_nir_shader_wave_size(struct nir_shader *const *shaders, int > shader_count, > + const struct radv_nir_compiler_options *options) > +{ > + if (shaders[0]->info.stage == MESA_SHADER_COMPUTE) > + return options->cs_wave_size; > + return 64; > +} > + > static > LLVMModuleRef ac_translate_nir_to_llvm(struct ac_llvm_compiler *ac_llvm, > struct nir_shader *const *shaders, > @@ -4333,8 +4342,11 @@ LLVMModuleRef ac_translate_nir_to_llvm(struct > ac_llvm_compiler *ac_llvm, > options->unsafe_math ? AC_FLOAT_MODE_UNSAFE_FP_MATH : > AC_FLOAT_MODE_DEFAULT; > > + uint8_t wave_size = radv_nir_shader_wave_size(shaders, > + shader_count, options); > + > ac_llvm_context_init(&ctx.ac, ac_llvm, options->chip_class, > - options->family, float_mode, 64); > + options->family, float_mode, wave_size); > ctx.context = ctx.ac.context; > > radv_nir_shader_info_init(&shader_info->info); > diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c > index 583b600dfdd..6b8b7bbe25a 100644 > --- a/src/amd/vulkan/radv_pipeline.c > +++ b/src/amd/vulkan/radv_pipeline.c > @@ -4648,7 +4648,8 @@ radv_compute_generate_pm4(struct radv_pipeline > *pipeline) > threads_per_threadgroup = compute_shader->info.cs.block_size[0] * > compute_shader->info.cs.block_size[1] * > compute_shader->info.cs.block_size[2]; > - waves_per_threadgroup = DIV_ROUND_UP(threads_per_threadgroup, 64); > + waves_per_threadgroup = DIV_ROUND_UP(threads_per_threadgroup, > + > device->physical_device->cs_wave_size); > > if (device->physical_device->rad_info.chip_class >= GFX10 && > waves_per_threadgroup == 1) > diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h > index 466f0288399..559cb3b336d 100644 > --- a/src/amd/vulkan/radv_private.h > +++ b/src/amd/vulkan/radv_private.h > @@ -334,6 +334,9 @@ struct radv_physical_device { > /* Whether DISABLE_CONSTANT_ENCODE_REG is supported. */ > bool has_dcc_constant_encode; > > + /* Number of threads per wave. */ > + uint8_t cs_wave_size; > + > /* This is the drivers on-disk cache used as a fallback as opposed to > * the pipeline cache defined by apps. > */ > diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c > index 0c3e375ee5e..0d2e5ae836a 100644 > --- a/src/amd/vulkan/radv_shader.c > +++ b/src/amd/vulkan/radv_shader.c > @@ -623,6 +623,16 @@ radv_get_shader_binary_size(size_t code_size) > return code_size + DEBUGGER_NUM_MARKERS * 4; > } > > +static uint8_t > +radv_get_shader_wave_size(const struct radv_physical_device *pdevice, > + gl_shader_stage stage) > +{ > + if (stage == MESA_SHADER_COMPUTE) > + return pdevice->cs_wave_size; > + > + return 64; > +} > + > static void radv_postprocess_config(const struct radv_physical_device > *pdevice, > const struct ac_shader_config *config_in, > const struct radv_shader_variant_info > *info, > @@ -630,6 +640,7 @@ static void radv_postprocess_config(const struct > radv_physical_device *pdevice, > struct ac_shader_config *config_out) > { > bool scratch_enabled = config_in->scratch_bytes_per_wave > 0; > + uint8_t wave_size = radv_get_shader_wave_size(pdevice, stage); > unsigned vgpr_comp_cnt = 0; > unsigned num_input_vgprs = info->num_input_vgprs; > > @@ -699,7 +710,8 @@ static void radv_postprocess_config(const struct > radv_physical_device *pdevice, > S_00B12C_SO_BASE3_EN(!!info->info.so.strides[3]) | > S_00B12C_SO_EN(!!info->info.so.num_outputs); > > - config_out->rsrc1 = S_00B848_VGPRS((num_vgprs - 1) / 4) | > + config_out->rsrc1 = S_00B848_VGPRS((num_vgprs - 1) / > + (wave_size == 32 ? 8 : 4)) | > S_00B848_DX10_CLAMP(1) | > S_00B848_FLOAT_MODE(config_out->float_mode); > > @@ -965,10 +977,15 @@ radv_shader_variant_create(struct radv_device *device, > if (binary->variant_info.is_ngg) > sym->size -= 32; > } > + > + uint8_t wave_size = > + radv_get_shader_wave_size(device->physical_device, > + binary->stage); > + > struct ac_rtld_open_info open_info = { > .info = &device->physical_device->rad_info, > .shader_type = binary->stage, > - .wave_size = 64, > + .wave_size = wave_size, > .num_parts = 1, > .elf_ptrs = &elf_data, > .elf_sizes = &elf_size, > @@ -1080,6 +1097,7 @@ shader_variant_compile(struct radv_device *device, > options->check_ir = device->instance->debug_flags & > RADV_DEBUG_CHECKIR; > options->tess_offchip_block_dw_size = > device->tess_offchip_block_dw_size; > options->address32_hi = > device->physical_device->rad_info.address32_hi; > + options->cs_wave_size = device->physical_device->cs_wave_size; > > if (options->supports_spill) > tm_options |= AC_TM_SUPPORTS_SPILL; > @@ -1229,6 +1247,7 @@ generate_shader_stats(struct radv_device *device, > { > enum chip_class chip_class = > device->physical_device->rad_info.chip_class; > unsigned lds_increment = chip_class >= GFX7 ? 512 : 256; > + uint8_t wave_size = > radv_get_shader_wave_size(device->physical_device, stage); > struct ac_shader_config *conf; > unsigned max_simd_waves; > unsigned lds_per_wave = 0; > @@ -1245,7 +1264,7 @@ generate_shader_stats(struct radv_device *device, > unsigned max_workgroup_size = > radv_nir_get_max_workgroup_size(chip_class, stage, > variant->nir); > lds_per_wave = (conf->lds_size * lds_increment) / > - DIV_ROUND_UP(max_workgroup_size, 64); > + DIV_ROUND_UP(max_workgroup_size, wave_size); > } > > if (conf->num_sgprs) > diff --git a/src/amd/vulkan/radv_shader.h b/src/amd/vulkan/radv_shader.h > index fea0d1c8df1..966949fae4f 100644 > --- a/src/amd/vulkan/radv_shader.h > +++ b/src/amd/vulkan/radv_shader.h > @@ -139,6 +139,7 @@ struct radv_nir_compiler_options { > enum chip_class chip_class; > uint32_t tess_offchip_block_dw_size; > uint32_t address32_hi; > + uint8_t cs_wave_size; > }; > > enum radv_ud_index { > -- > 2.22.0 > > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev