On Thu, Nov 23, 2017 at 9:18 AM, Nicolai Hähnle <nhaeh...@gmail.com> wrote: > On 22.11.2017 22:33, Marek Olšák wrote: >> >> On Wed, Nov 22, 2017 at 7:46 PM, Nicolai Hähnle <nhaeh...@gmail.com >> <mailto:nhaeh...@gmail.com>> wrote: >> > On 21.11.2017 18:30, Marek Olšák wrote: >> >> >> >> From: Marek Olšák <marek.ol...@amd.com <mailto:marek.ol...@amd.com>> >> >> >> >> >> The next commit will reduce the size even more. >> >> --- >> >> src/amd/common/ac_surface.c | 2 +- >> >> src/amd/common/ac_surface.h | 2 +- >> >> src/amd/vulkan/radv_image.c | 8 ++++---- >> >> src/gallium/drivers/r600/evergreen_state.c | 8 ++++---- >> >> src/gallium/drivers/r600/r600_state.c | 8 ++++---- >> >> src/gallium/drivers/r600/r600_texture.c | 14 >> +++++++------- >> >> src/gallium/drivers/r600/radeon_uvd.c | 2 +- >> >> src/gallium/drivers/radeon/r600_texture.c | 14 >> +++++++------- >> >> src/gallium/drivers/radeon/radeon_uvd.c | 2 +- >> >> src/gallium/drivers/radeonsi/cik_sdma.c | 4 ++-- >> >> src/gallium/drivers/radeonsi/si_dma.c | 8 ++++---- >> >> src/gallium/winsys/radeon/drm/radeon_drm_surface.c | 4 ++-- >> >> 12 files changed, 38 insertions(+), 38 deletions(-) >> >> >> >> diff --git a/src/amd/common/ac_surface.c b/src/amd/common/ac_surface.c >> >> index f7600a3..2b6c3fb 100644 >> >> --- a/src/amd/common/ac_surface.c >> >> +++ b/src/amd/common/ac_surface.c >> >> @@ -297,21 +297,21 @@ static int gfx6_compute_level(ADDR_HANDLE >> addrlib, >> >> ret = AddrComputeSurfaceInfo(addrlib, >> >> AddrSurfInfoIn, >> >> AddrSurfInfoOut); >> >> if (ret != ADDR_OK) { >> >> return ret; >> >> } >> >> surf_level = is_stencil ? &surf->u.legacy.stencil_level[level] >> : >> >> &surf->u.legacy.level[level]; >> >> surf_level->offset = align64(surf->surf_size, >> >> AddrSurfInfoOut->baseAlign); >> >> - surf_level->slice_size = AddrSurfInfoOut->sliceSize; >> >> + surf_level->slice_size_dw = AddrSurfInfoOut->sliceSize / 4; >> >> surf_level->nblk_x = AddrSurfInfoOut->pitch; >> >> surf_level->nblk_y = AddrSurfInfoOut->height; >> >> switch (AddrSurfInfoOut->tileMode) { >> >> case ADDR_TM_LINEAR_ALIGNED: >> >> surf_level->mode = RADEON_SURF_MODE_LINEAR_ALIGNED; >> >> break; >> >> case ADDR_TM_1D_TILED_THIN1: >> >> surf_level->mode = RADEON_SURF_MODE_1D; >> >> break; >> >> diff --git a/src/amd/common/ac_surface.h b/src/amd/common/ac_surface.h >> >> index 1dc95cd..ec89f6b 100644 >> >> --- a/src/amd/common/ac_surface.h >> >> +++ b/src/amd/common/ac_surface.h >> >> @@ -64,21 +64,21 @@ enum radeon_micro_mode { >> >> /* bits 19 and 20 are reserved for libdrm_radeon, don't use them */ >> >> #define RADEON_SURF_FMASK (1 << 21) >> >> #define RADEON_SURF_DISABLE_DCC (1 << 22) >> >> #define RADEON_SURF_TC_COMPATIBLE_HTILE (1 << 23) >> >> #define RADEON_SURF_IMPORTED (1 << 24) >> >> #define RADEON_SURF_OPTIMIZE_FOR_SPACE (1 << 25) >> >> #define RADEON_SURF_SHAREABLE (1 << 26) >> >> struct legacy_surf_level { >> >> uint64_t offset; >> >> - uint64_t slice_size; >> >> + uint32_t slice_size_dw; /* in dwords; max = >> 4GB / >> >> 4. */ >> >> uint32_t dcc_offset; /* relative offset >> within >> >> DCC mip tree */ >> >> uint32_t dcc_fast_clear_size; >> >> uint16_t nblk_x; >> >> uint16_t nblk_y; >> >> enum radeon_surf_mode mode; >> >> }; >> >> struct legacy_surf_layout { >> >> unsigned bankw:4; /* max 8 */ >> >> unsigned bankh:4; /* max 8 */ >> >> diff --git a/src/amd/vulkan/radv_image.c b/src/amd/vulkan/radv_image.c >> >> index b532aa9..fb7bbde 100644 >> >> --- a/src/amd/vulkan/radv_image.c >> >> +++ b/src/amd/vulkan/radv_image.c >> >> @@ -1149,25 +1149,25 @@ void radv_GetImageSubresourceLayout( >> >> if (device->physical_device->rad_info.chip_class >= GFX9) { >> >> pLayout->offset = surface->u.gfx9.offset[level] + >> >> surface->u.gfx9.surf_slice_size * layer; >> >> pLayout->rowPitch = surface->u.gfx9.surf_pitch * >> >> surface->bpe; >> >> pLayout->arrayPitch = surface->u.gfx9.surf_slice_size; >> >> pLayout->depthPitch = surface->u.gfx9.surf_slice_size; >> >> pLayout->size = surface->u.gfx9.surf_slice_size; >> >> if (image->type == VK_IMAGE_TYPE_3D) >> >> pLayout->size *= u_minify(image->info.depth, >> >> level); >> >> } else { >> >> - pLayout->offset = >> surface->u.legacy.level[level].offset + >> >> surface->u.legacy.level[level].slice_size * layer; >> >> + pLayout->offset = >> surface->u.legacy.level[level].offset + >> >> surface->u.legacy.level[level].slice_size_dw * 4 * layer; >> > >> > >> > I believe the maximum slice size in bytes is (with an RGBA32 texture) >> > >> > 16384 * 16384 * 16 = 2^14 * 2^14 * 2^4 = 2^32 >> > >> > The problem with this code is that the multiplication is now performed >> as >> > uint32_t and can therefore wrap-around. So an explicit cast to 64-bits >> is >> > required. >> > >> > In practice, I guess this rather becomes an issue with smaller slice >> sizes >> > but larger layer indices. We really need test case to exercise >= 4 GB >> > textures... >> >> This should do it: >> >> diff --git a/src/amd/common/ac_surface.h b/src/amd/common/ac_surface.h >> index f18548f..fa17b34 100644 >> --- a/src/amd/common/ac_surface.h >> +++ b/src/amd/common/ac_surface.h >> @@ -71,7 +71,8 @@ enum radeon_micro_mode { >> >> struct legacy_surf_level { >> uint64_t offset; >> - uint32_t slice_size_dw; /* in dwords; max = 4GB / >> 4. */ >> + /* Declare 32 bits of uint64_t, so that multiplication results in 64 >> bits. */ >> + uint64_t slice_size_dw:32; /* in dwords; max = 4GB >> / 4. */ >> uint32_t dcc_offset; /* relative offset within >> DCC mip tree */ >> uint32_t dcc_fast_clear_size; >> unsigned nblk_x:15; > > > Congratulations, you found a compiler bug! :) > > I like this idea very much; however: > > nha@capella:~/tmp$ cat foo.c > #include <stdint.h> > > struct s { > uint64_t foo:32; > }; > > uint64_t foo(uint32_t x, struct s* p) > { > return x * p->foo; > } > > uint64_t foo2(uint32_t x, struct s* p) > { > return x * (uint64_t)p->foo; > } > nha@capella:~/tmp$ gcc -Wall -Wextra -O2 -S foo.c > nha@capella:~/tmp$ cat foo.s > .file "foo.c" > .text > .p2align 4,,15 > .globl foo > .type foo, @function > foo: > .LFB0: > .cfi_startproc > movl %edi, %eax > imull (%rsi), %eax > ret > .cfi_endproc > .LFE0: > .size foo, .-foo > .p2align 4,,15 > .globl foo2 > .type foo2, @function > foo2: > .LFB1: > .cfi_startproc > movl (%rsi), %eax > movl %edi, %edi > imulq %rdi, %rax > ret > .cfi_endproc > .LFE1: > .size foo2, .-foo2 > .ident "GCC: (Ubuntu 6.3.0-12ubuntu2) 6.3.0 20170406" > .section .note.GNU-stack,"",@progbits > > This is with gcc-7.2. > > Some more observations: > > - Clang 5.0 produces the same code > - gcc produces entirely ridiculous code when the bitfield has 33 or more > bits (it ends up producing what is essentially a 33-bit wide multiplication) > - Clang actually produces the expected code when the bitfield has 33 or more > bits > - gcc produces the expected code when compiling it as C++ > > Could you please file bugs against both gcc and clang for this? > > I mean, it's entirely possible that the C standard is crazy enough that it > says this behavior is technically correct -- though I doubt it given the > behavior for 33 bits and the inconsistency between the compilers for C++. In > any case, *if* treating the value as less than 64 bits is correct, they > should really produce a warning since the resulting code is dangerously > different from what you'd expect.
I guess common sense doesn't always work with compilers and/or standards. I don't plan to file compiler bugs because I don't really know what the C standard says in this case. Marek _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev