On 7 Jul. 2017 19:29, "Christian König" <deathsim...@vodafone.de> wrote:
What tilling format have the destination textures? Sounds like the offset is just added so that we distribute memory accesses more equally over memory channels. >From the traces i think tile index mode was 10. Dave. Regards, Christian. Am 07.07.2017 um 09:18 schrieb Dave Airlie: > From: Dave Airlie <airl...@redhat.com> > > (this patch doesn't seem to work fully, hopefully AMD can tell us > more info on the rules, and how to calculate the magic). > > It appears that to get full access to memory bandwidth with MRT > rendering the pro vulkan driver seems to offset each image by 0x3800. > I'm not sure how that value is calculated. > > Glenn came up with the idea (probably what -pro does also) of just > offseting every image in round robin order, in the hope that apps > would create mrt images in sequence anyways. > > This attempts to do that using an atomic counter in the device. > > This gets the deferred demo from 800fps->1150fps on my rx480. > > (I've tested dota2 and talos still run at least after this) > --- > src/amd/vulkan/radv_device.c | 7 ++++--- > src/amd/vulkan/radv_image.c | 16 +++++++++++++++- > src/amd/vulkan/radv_private.h | 3 +++ > 3 files changed, 22 insertions(+), 4 deletions(-) > > diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c > index d1c519a..f39526d 100644 > --- a/src/amd/vulkan/radv_device.c > +++ b/src/amd/vulkan/radv_device.c > @@ -2706,7 +2706,7 @@ radv_initialise_color_surface(struct radv_device > *device, > /* Intensity is implemented as Red, so treat it that way. */ > cb->cb_color_attrib = S_028C74_FORCE_DST_ALPHA_1(desc->swizzle[3] > == VK_SWIZZLE_1); > - va = device->ws->buffer_get_va(iview->bo) + iview->image->offset; > + va = device->ws->buffer_get_va(iview->bo) + iview->image->offset > + iview->image->mrt_offset; > if (device->physical_device->rad_info.chip_class >= GFX9) { > struct gfx9_surf_meta_flags meta; > @@ -2756,11 +2756,11 @@ radv_initialise_color_surface(struct radv_device > *device, > /* CMASK variables */ > va = device->ws->buffer_get_va(iview->bo) + iview->image->offset; > - va += iview->image->cmask.offset; > + va += iview->image->cmask.offset + iview->image->mrt_offset; > cb->cb_color_cmask = va >> 8; > va = device->ws->buffer_get_va(iview->bo) + iview->image->offset; > - va += iview->image->dcc_offset; > + va += iview->image->dcc_offset + iview->image->mrt_offset; > cb->cb_dcc_base = va >> 8; > uint32_t max_slice = radv_surface_layer_count(iview); > @@ -2776,6 +2776,7 @@ radv_initialise_color_surface(struct radv_device > *device, > if (iview->image->fmask.size) { > va = device->ws->buffer_get_va(iview->bo) + > iview->image->offset + iview->image->fmask.offset; > + va += iview->image->mrt_offset; > cb->cb_color_fmask = va >> 8; > } else { > cb->cb_color_fmask = cb->cb_color_base; > diff --git a/src/amd/vulkan/radv_image.c b/src/amd/vulkan/radv_image.c > index b3a223b..bc20a53 100644 > --- a/src/amd/vulkan/radv_image.c > +++ b/src/amd/vulkan/radv_image.c > @@ -31,6 +31,7 @@ > #include "sid.h" > #include "gfx9d.h" > #include "util/debug.h" > +#include "util/u_atomic.h" > static unsigned > radv_choose_tiling(struct radv_device *Device, > const struct radv_image_create_info *create_info) > @@ -208,6 +209,7 @@ si_set_mutable_tex_desc_fields(struct radv_device > *device, > } else > va += base_level_info->offset; > + va += image->mrt_offset; > state[0] = va >> 8; > state[1] &= C_008F14_BASE_ADDRESS_HI; > state[1] |= S_008F14_BASE_ADDRESS_HI(va >> 40); > @@ -220,6 +222,7 @@ si_set_mutable_tex_desc_fields(struct radv_device > *device, > state[7] = 0; > if (image->surface.dcc_size && first_level < > image->surface.num_dcc_levels) { > uint64_t meta_va = gpu_address + image->dcc_offset; > + meta_va += image->mrt_offset; > if (chip_class <= VI) > meta_va += base_level_info->dcc_offset; > state[6] |= S_008F28_COMPRESSION_EN(1); > @@ -436,7 +439,7 @@ si_make_texture_descriptor(struct radv_device *device, > uint64_t gpu_address = device->ws->buffer_get_va(imag > e->bo); > uint64_t va; > - va = gpu_address + image->offset + image->fmask.offset; > + va = gpu_address + image->offset + image->mrt_offset + > image->fmask.offset; > if (device->physical_device->rad_info.chip_class >= GFX9) > { > fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK; > @@ -642,6 +645,7 @@ radv_image_alloc_fmask(struct radv_device *device, > radv_image_get_fmask_info(device, image, image->info.samples, > &image->fmask); > image->fmask.offset = align64(image->size, image->fmask.alignment); > + image->fmask.size += image->mrt_offset; > image->size = image->fmask.offset + image->fmask.size; > image->alignment = MAX2(image->alignment, image->fmask.alignment); > } > @@ -709,6 +713,7 @@ radv_image_alloc_cmask(struct radv_device *device, > radv_image_get_cmask_info(device, image, &image->cmask); > image->cmask.offset = align64(image->size, image->cmask.alignment); > + image->cmask.size += image->mrt_offset; > /* + 8 for storing the clear values */ > if (!image->clear_value_offset) { > image->clear_value_offset = image->cmask.offset + > image->cmask.size; > @@ -724,6 +729,7 @@ radv_image_alloc_dcc(struct radv_device *device, > { > image->dcc_offset = align64(image->size, > image->surface.dcc_alignment); > /* + 16 for storing the clear values + dcc pred */ > + image->surface.dcc_size += image->mrt_offset; > image->clear_value_offset = image->dcc_offset + > image->surface.dcc_size; > image->dcc_pred_offset = image->clear_value_offset + 8; > image->size = image->dcc_offset + image->surface.dcc_size + 16; > @@ -801,6 +807,14 @@ radv_image_create(VkDevice _device, > image->size = image->surface.surf_size; > image->alignment = image->surface.surf_alignment; > + if ((pCreateInfo->usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) && > !create_info->scanout) { > + uint32_t mrt_idx = > p_atomic_inc_return(&device->image_mrt_offset_counter) > - 1; > + mrt_idx %= 8; > + mrt_idx *= 0x3800; > + image->mrt_offset = mrt_idx; > + image->size += image->mrt_offset; > + } > + > if (image->exclusive || image->queue_family_mask == 1) > can_cmask_dcc = true; > diff --git a/src/amd/vulkan/radv_private.h > b/src/amd/vulkan/radv_private.h > index 5c30d18..f09095a 100644 > --- a/src/amd/vulkan/radv_private.h > +++ b/src/amd/vulkan/radv_private.h > @@ -547,6 +547,8 @@ struct radv_device { > /* Backup in-memory cache to be used if the app doesn't provide > one */ > struct radv_pipeline_cache * mem_cache; > + > + uint32_t image_mrt_offset_counter; > }; > struct radv_device_memory { > @@ -1211,6 +1213,7 @@ struct radv_image { > /* Set when bound */ > struct radeon_winsys_bo *bo; > VkDeviceSize offset; > + VkDeviceSize mrt_offset; > uint32_t dcc_offset; > uint32_t htile_offset; > struct radeon_surf surface; >
_______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev