https://gcc.gnu.org/bugzilla/show_bug.cgi?id=115146

--- Comment #1 from Sergei Trofimovich <slyfox at gcc dot gnu.org> ---
Specifically if I change original example to contain 16 bytes instead of 8:

--- bug.c.orig  2024-05-18 11:07:47.426351557 +0100
+++ bug.c       2024-05-18 11:08:02.135601287 +0100
@@ -15,2 +15,2 @@
-    u8 src[8] __attribute__((aligned(16))) = { 0 };
-    u8 dst[8] __attribute__((aligned(16))) = { 0 };
+    u8 src[16] __attribute__((aligned(16))) = { 0 };
+    u8 dst[16] __attribute__((aligned(16))) = { 0 };
@@ -23 +23 @@
-    for (unsigned long i = 0; i < 8; i += 2) {
+    for (unsigned long i = 0; i < 16; i += 2) {

I get expected code:

Dump of assembler code for function main:
   0x0000000000401030 <+0>:     sub    $0x28,%rsp
   0x0000000000401034 <+4>:     pxor   %xmm0,%xmm0
   0x0000000000401038 <+8>:     mov    %rsp,%rdi
   0x000000000040103b <+11>:    movaps %xmm0,(%rsp)
   0x000000000040103f <+15>:    movaps %xmm0,0x10(%rsp)
   0x0000000000401044 <+20>:    call   0x401170 <fill_src>
   0x0000000000401049 <+25>:    movdqa (%rsp),%xmm0
   0x000000000040104e <+30>:    lea    0x10(%rsp),%rdi
   0x0000000000401053 <+35>:    movdqa %xmm0,%xmm1
   0x0000000000401057 <+39>:    psllw  $0x8,%xmm0
   0x000000000040105c <+44>:    psrlw  $0x8,%xmm1
   0x0000000000401061 <+49>:    por    %xmm0,%xmm1
   0x0000000000401065 <+53>:    movaps %xmm1,0x10(%rsp)
   0x000000000040106a <+58>:    call   0x401180 <assert_dst>
   0x000000000040106f <+63>:    xor    %eax,%eax
   0x0000000000401071 <+65>:    add    $0x28,%rsp
   0x0000000000401075 <+69>:    ret

Reply via email to