https://gcc.gnu.org/bugzilla/show_bug.cgi?id=112510

--- Comment #6 from Vladimir Sadovnikov <sadko4u at gmail dot com> ---
Here is disassembly of `test.cpp` function for GCC 7.5.0:

```
0000000000400727 <_Z13gate_x1_curvePfPKfPKN3dsp11gate_knee_tEm>:
  400727:       4c 8d 54 24 08          lea    0x8(%rsp),%r10
  40072c:       48 83 e4 c0             and    $0xffffffffffffffc0,%rsp
  400730:       41 ff 72 f8             push   -0x8(%r10)
  400734:       55                      push   %rbp
  400735:       48 89 e5                mov    %rsp,%rbp
  400738:       41 57                   push   %r15
  40073a:       41 56                   push   %r14
  40073c:       41 55                   push   %r13
  40073e:       41 54                   push   %r12
  400740:       41 52                   push   %r10
  400742:       53                      push   %rbx
  400743:       48 81 ec 80 03 00 00    sub    $0x380,%rsp
  40074a:       49 89 fc                mov    %rdi,%r12
  40074d:       49 89 f5                mov    %rsi,%r13
  400750:       49 89 d7                mov    %rdx,%r15
  400753:       49 89 ce                mov    %rcx,%r14
  400756:       48 8d 9d 50 fc ff ff    lea    -0x3b0(%rbp),%rbx
  40075d:       83 3d 1c 19 00 00 00    cmpl   $0x0,0x191c(%rip)        #
402080 <__asan_option_detect_stack_use_after_return@@Base>
  400764:       0f 85 32 01 00 00       jne    40089c
<_Z13gate_x1_curvePfPKfPKN3dsp11gate_knee_tEm+0x175>
  40076a:       48 c7 03 b3 8a b5 41    movq   $0x41b58ab3,(%rbx)
  400771:       48 c7 43 08 f8 0c 40    movq   $0x400cf8,0x8(%rbx)
  400778:       00 
  400779:       48 c7 43 10 27 07 40    movq   $0x400727,0x10(%rbx)
  400780:       00 
  400781:       48 89 df                mov    %rbx,%rdi
  400784:       48 c1 ef 03             shr    $0x3,%rdi
  400788:       c7 87 00 80 ff 7f f1    movl   $0xf1f1f1f1,0x7fff8000(%rdi)
  40078f:       f1 f1 f1 
  400792:       c7 87 04 80 ff 7f f1    movl   $0xf1f1f1f1,0x7fff8004(%rdi)
  400799:       f1 f1 f1 
  40079c:       c7 87 0c 80 ff 7f f2    movl   $0xf2f2f2f2,0x7fff800c(%rdi)
  4007a3:       f2 f2 f2 
  4007a6:       c7 87 20 80 ff 7f f2    movl   $0xf2f2f2f2,0x7fff8020(%rdi)
  4007ad:       f2 f2 f2 
  4007b0:       c7 87 24 80 ff 7f f2    movl   $0xf2f2f2f2,0x7fff8024(%rdi)
  4007b7:       f2 f2 f2 
  4007ba:       c7 87 68 80 ff 7f f3    movl   $0xf3f3f3f3,0x7fff8068(%rdi)
  4007c1:       f3 f3 f3 
  4007c4:       c7 87 6c 80 ff 7f f3    movl   $0xf3f3f3f3,0x7fff806c(%rdi)
  4007cb:       f3 f3 f3 
  4007ce:       62 d2 7d 48 18 07       vbroadcastss (%r15),%zmm0
  4007d4:       62 d2 7d 48 18 4f 01    vbroadcastss 0x4(%r15),%zmm1
  4007db:       62 d2 7d 48 18 57 02    vbroadcastss 0x8(%r15),%zmm2
  4007e2:       62 d2 7d 48 18 5f 03    vbroadcastss 0xc(%r15),%zmm3
  4007e9:       62 d2 7d 48 18 67 04    vbroadcastss 0x10(%r15),%zmm4
  4007f0:       62 d2 7d 48 18 6f 05    vbroadcastss 0x14(%r15),%zmm5
  4007f7:       62 d2 7d 48 18 77 06    vbroadcastss 0x18(%r15),%zmm6
  4007fe:       62 d2 7d 48 18 7f 07    vbroadcastss 0x1c(%r15),%zmm7
  400805:       62 f1 7c 48 29 43 05    vmovaps %zmm0,0x140(%rbx)
  40080c:       62 f1 7c 48 29 4b 06    vmovaps %zmm1,0x180(%rbx)
  400813:       62 f1 7c 48 29 53 07    vmovaps %zmm2,0x1c0(%rbx)
  40081a:       62 f1 7c 48 29 5b 08    vmovaps %zmm3,0x200(%rbx)
  400821:       62 f1 7c 48 29 63 09    vmovaps %zmm4,0x240(%rbx)
  400828:       62 f1 7c 48 29 6b 0a    vmovaps %zmm5,0x280(%rbx)
  40082f:       62 f1 7c 48 29 73 0b    vmovaps %zmm6,0x2c0(%rbx)
  400836:       62 f1 7c 48 29 7b 0c    vmovaps %zmm7,0x300(%rbx)
```

gcc --version
gcc (SUSE Linux) 7.5.0
Copyright (C) 2017 Free Software Foundation, Inc.
This is free software; see the source for copying conditions.  There is NO
warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.

As we see, there is  no load of %rbx from stack and all offsets are multiple of
0x40.

Reply via email to