https://gcc.gnu.org/bugzilla/show_bug.cgi?id=61296
--- Comment #8 from H.J. Lu <hjl.tools at gmail dot com> --- (In reply to Jakub Jelinek from comment #7) > See discussions when I've added DATA_ABI_ALIGNMENT. DATA_ABI_ALIGNMENT was added for PR 56564: /* Similar to DATA_ALIGNMENT, but for the cases where the ABI mandates some alignment increase, instead of optimization only purposes. E.g. AMD x86-64 psABI says that variables with array type larger than 15 bytes must be aligned to 16 byte boundaries. If this macro is not defined, then ALIGN is used. */ #define DATA_ABI_ALIGNMENT(TYPE, ALIGN) \ ix86_data_alignment ((TYPE), (ALIGN), false) and ix86_data_alignment was changed by https://gcc.gnu.org/ml/gcc-patches/2014-01/msg01086.html There is a discussion we should always align to DATA_ABI_ALIGNMENT: https://gcc.gnu.org/ml/gcc-patches/2014-05/msg01435.html My question was "Should we limit DATA_ALIGNMENT to MAX (ABI alignment, natural alignment)?" GCC will only use the excessive alignment with locally defined data, which has no psABI implications: [hjl@gnu-6 pr61296]$ cat z.c struct foo { char i[128]; }; struct foo x = { 1 }; struct foo y = { 1 }; struct foo z = { 1 }; void bar () { int i; for (i = 0; i < sizeof (x.i); i++) x.i[i] = y.i[i] + z.i[i]; } [hjl@gnu-6 pr61296]$ /export/build/gnu/gcc/build-x86_64-linux/gcc/xgcc -B/export/build/gnu/gcc/build-x86_64-linux/gcc/ -O3 -mavx2 -S z.c -fPIC [hjl@gnu-6 pr61296]$ cat z.s .file "z.c" .section .text.unlikely,"ax",@progbits .LCOLDB0: .text .LHOTB0: .p2align 4,,15 .globl bar .type bar, @function bar: .LFB0: .cfi_startproc leaq 8(%rsp), %r10 .cfi_def_cfa 10, 0 movq y@GOTPCREL(%rip), %rdi andq $-32, %rsp pushq -8(%r10) pushq %rbp .cfi_escape 0x10,0x6,0x2,0x76,0 movq %rsp, %rbp pushq %r12 pushq %r10 .cfi_escape 0xf,0x3,0x76,0x70,0x6 .cfi_escape 0x10,0xc,0x2,0x76,0x78 movq %rdi, %r10 pushq %rbx .cfi_escape 0x10,0x3,0x2,0x76,0x68 negq %r10 andl $31, %r10d je .L7 movq x@GOTPCREL(%rip), %r8 movq z@GOTPCREL(%rip), %r9 movl %r10d, %esi xorl %eax, %eax xorl %edx, %edx movl $128, %r11d .p2align 4,,10 .p2align 3 .L3: movzbl (%rdi,%rax), %ecx addl $1, %edx addb (%r9,%rax), %cl movb %cl, (%r8,%rax) movl %r11d, %ecx addq $1, %rax subl %edx, %ecx cmpl %r10d, %edx jne .L3 movl %edx, %eax movl %ecx, %ebx movl $96, %r11d movl $3, %r12d .L2: leaq (%r9,%rax), %rdx leaq (%rdi,%rax), %r10 addq %r8, %rax cmpl $4, %r12d vmovdqu (%rdx), %xmm0 vinserti128 $0x1, 16(%rdx), %ymm0, %ymm0 vpaddb (%r10), %ymm0, %ymm0 vmovups %xmm0, (%rax) vextracti128 $0x1, %ymm0, 16(%rax) vmovdqu 32(%rdx), %xmm0 vinserti128 $0x1, 48(%rdx), %ymm0, %ymm0 vpaddb 32(%r10), %ymm0, %ymm0 vmovups %xmm0, 32(%rax) vextracti128 $0x1, %ymm0, 48(%rax) vmovdqu 64(%rdx), %xmm0 vinserti128 $0x1, 80(%rdx), %ymm0, %ymm0 vpaddb 64(%r10), %ymm0, %ymm0 vmovups %xmm0, 64(%rax) vextracti128 $0x1, %ymm0, 80(%rax) jne .L4 vmovdqu 96(%rdx), %xmm0 vinserti128 $0x1, 112(%rdx), %ymm0, %ymm0 vpaddb 96(%r10), %ymm0, %ymm0 vmovups %xmm0, 96(%rax) vextracti128 $0x1, %ymm0, 112(%rax) .L4: leal (%r11,%rsi), %eax subl %r11d, %ecx cmpl %r11d, %ebx leal (%rax,%rcx), %esi je .L10 .p2align 4,,10 .p2align 3 .L5: movslq %eax, %rdx addl $1, %eax movzbl (%r9,%rdx), %ecx addb (%rdi,%rdx), %cl cmpl %esi, %eax movb %cl, (%r8,%rdx) jne .L5 .L10: vzeroupper popq %rbx popq %r10 .cfi_remember_state .cfi_def_cfa 10, 0 popq %r12 popq %rbp leaq -8(%r10), %rsp .cfi_def_cfa 7, 8 ret .p2align 4,,10 .p2align 3 .L7: .cfi_restore_state movl $128, %r11d movl $4, %r12d movl $128, %ebx xorl %eax, %eax movl $128, %ecx xorl %esi, %esi movq x@GOTPCREL(%rip), %r8 movq z@GOTPCREL(%rip), %r9 jmp .L2 .cfi_endproc .LFE0: .size bar, .-bar .section .text.unlikely .LCOLDE0: .text .LHOTE0: .globl z .data .align 64 .type z, @object .size z, 128 z: .byte 1 .zero 127 .globl y .align 64 .type y, @object .size y, 128 y: .byte 1 .zero 127 .globl x .align 64 .type x, @object .size x, 128 x: .byte 1 .zero 127 .ident "GCC: (GNU) 5.0.0 20141205 (experimental)" .section .note.GNU-stack,"",@progbits Do you have a testcase to show decreasing DATA_ALIGNMENT would break backwards compatibility with older gcc versions? Our data show that the excessive alignment doesn't improve performance.