https://gcc.gnu.org/bugzilla/show_bug.cgi?id=83479

            Bug ID: 83479
           Summary: Register spilling in AVX code
           Product: gcc
           Version: 7.2.0
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: c
          Assignee: unassigned at gcc dot gnu.org
          Reporter: bugzi...@poradnik-webmastera.com
  Target Milestone: ---

Here is snipped of code which performs some calculations on matrix. It
repeatedly transforms some (N * N) matrix into (N-1 * N-1) one, and returns
final scalar value. gcc for some reason is not able to detect that intermediate
values are not needed anymore, and starts spilling. Code below is from gcc 7.2,
trunk version also generates similar code. Code was compiled with "-O3
-march=haswell".
BTW, clang 5 properly handles this and does not spill.

[code]
#include "immintrin.h"

double test(const double data[9][8])
{
  __m256d vLastRow, vLastCol, vSqrtRow, vSqrtCol;

  __m256d v1 = _mm256_load_pd (&data[0][0]);
  __m256d v2 = _mm256_load_pd (&data[1][0]);
  __m256d v3 = _mm256_load_pd (&data[2][0]);
  __m256d v4 = _mm256_load_pd (&data[3][0]);
  __m256d v5 = _mm256_load_pd (&data[4][0]);
  __m256d v6 = _mm256_load_pd (&data[5][0]);
  __m256d v7 = _mm256_load_pd (&data[6][0]);
  __m256d v8 = _mm256_load_pd (&data[7][0]);

  // 8
  vLastRow = _mm256_load_pd (&data[9][0]);
  vSqrtRow = _mm256_sqrt_pd(vLastRow);

  vLastCol = _mm256_set1_pd(vLastRow[0]);
  vSqrtCol = _mm256_sqrt_pd(vLastCol);
  v1 = (v1 - vLastRow * vLastCol) * vSqrtRow * vSqrtCol;
  vLastCol = _mm256_set1_pd(vLastRow[1]);
  vSqrtCol = _mm256_sqrt_pd(vLastCol);
  v2 = (v2 - vLastRow * vLastCol) * vSqrtRow * vSqrtCol;
  vLastCol = _mm256_set1_pd(vLastRow[2]);
  vSqrtCol = _mm256_sqrt_pd(vLastCol);
  v3 = (v3 - vLastRow * vLastCol) * vSqrtRow * vSqrtCol;
  vLastCol = _mm256_set1_pd(vLastRow[3]);
  vSqrtCol = _mm256_sqrt_pd(vLastCol);
  v4 = (v4 - vLastRow * vLastCol) * vSqrtRow * vSqrtCol;
  vLastCol = _mm256_set1_pd(vLastRow[4]);
  vSqrtCol = _mm256_sqrt_pd(vLastCol);
  v5 = (v5 - vLastRow * vLastCol) * vSqrtRow * vSqrtCol;
  vLastCol = _mm256_set1_pd(vLastRow[5]);
  vSqrtCol = _mm256_sqrt_pd(vLastCol);
  v6 = (v6 - vLastRow * vLastCol) * vSqrtRow * vSqrtCol;
  vLastCol = _mm256_set1_pd(vLastRow[6]);
  vSqrtCol = _mm256_sqrt_pd(vLastCol);
  v7 = (v7 - vLastRow * vLastCol) * vSqrtRow * vSqrtCol;
  vLastCol = _mm256_set1_pd(vLastRow[7]);
  vSqrtCol = _mm256_sqrt_pd(vLastCol);
  v8 = (v8 - vLastRow * vLastCol) * vSqrtRow * vSqrtCol;

  // 7
  vLastRow = v8;
  vSqrtRow = _mm256_sqrt_pd(vLastRow);

  vLastCol = _mm256_set1_pd(vLastRow[0]);
  vSqrtCol = _mm256_sqrt_pd(vLastCol);
  v1 = (v1 - vLastRow * vLastCol) * vSqrtRow * vSqrtCol;
  vLastCol = _mm256_set1_pd(vLastRow[1]);
  vSqrtCol = _mm256_sqrt_pd(vLastCol);
  v2 = (v2 - vLastRow * vLastCol) * vSqrtRow * vSqrtCol;
  vLastCol = _mm256_set1_pd(vLastRow[2]);
  vSqrtCol = _mm256_sqrt_pd(vLastCol);
  v3 = (v3 - vLastRow * vLastCol) * vSqrtRow * vSqrtCol;
  vLastCol = _mm256_set1_pd(vLastRow[3]);
  vSqrtCol = _mm256_sqrt_pd(vLastCol);
  v4 = (v4 - vLastRow * vLastCol) * vSqrtRow * vSqrtCol;
  vLastCol = _mm256_set1_pd(vLastRow[4]);
  vSqrtCol = _mm256_sqrt_pd(vLastCol);
  v5 = (v5 - vLastRow * vLastCol) * vSqrtRow * vSqrtCol;
  vLastCol = _mm256_set1_pd(vLastRow[5]);
  vSqrtCol = _mm256_sqrt_pd(vLastCol);
  v6 = (v6 - vLastRow * vLastCol) * vSqrtRow * vSqrtCol;
  vLastCol = _mm256_set1_pd(vLastRow[6]);
  vSqrtCol = _mm256_sqrt_pd(vLastCol);
  v7 = (v7 - vLastRow * vLastCol) * vSqrtRow * vSqrtCol;

  // 6
  vLastRow = v7;
  vSqrtRow = _mm256_sqrt_pd(vLastRow);

  vLastCol = _mm256_set1_pd(vLastRow[0]);
  vSqrtCol = _mm256_sqrt_pd(vLastCol);
  v1 = (v1 - vLastRow * vLastCol) * vSqrtRow * vSqrtCol;
  vLastCol = _mm256_set1_pd(vLastRow[1]);
  vSqrtCol = _mm256_sqrt_pd(vLastCol);
  v2 = (v2 - vLastRow * vLastCol) * vSqrtRow * vSqrtCol;
  vLastCol = _mm256_set1_pd(vLastRow[2]);
  vSqrtCol = _mm256_sqrt_pd(vLastCol);
  v3 = (v3 - vLastRow * vLastCol) * vSqrtRow * vSqrtCol;
  vLastCol = _mm256_set1_pd(vLastRow[3]);
  vSqrtCol = _mm256_sqrt_pd(vLastCol);
  v4 = (v4 - vLastRow * vLastCol) * vSqrtRow * vSqrtCol;
  vLastCol = _mm256_set1_pd(vLastRow[4]);
  vSqrtCol = _mm256_sqrt_pd(vLastCol);
  v5 = (v5 - vLastRow * vLastCol) * vSqrtRow * vSqrtCol;
  vLastCol = _mm256_set1_pd(vLastRow[5]);
  vSqrtCol = _mm256_sqrt_pd(vLastCol);
  v6 = (v6 - vLastRow * vLastCol) * vSqrtRow * vSqrtCol;

  // 5
  vLastRow = v6;
  vSqrtRow = _mm256_sqrt_pd(vLastRow);

  vLastCol = _mm256_set1_pd(vLastRow[0]);
  vSqrtCol = _mm256_sqrt_pd(vLastCol);
  v1 = (v1 - vLastRow * vLastCol) * vSqrtRow * vSqrtCol;
  vLastCol = _mm256_set1_pd(vLastRow[1]);
  vSqrtCol = _mm256_sqrt_pd(vLastCol);
  v2 = (v2 - vLastRow * vLastCol) * vSqrtRow * vSqrtCol;
  vLastCol = _mm256_set1_pd(vLastRow[2]);
  vSqrtCol = _mm256_sqrt_pd(vLastCol);
  v3 = (v3 - vLastRow * vLastCol) * vSqrtRow * vSqrtCol;
  vLastCol = _mm256_set1_pd(vLastRow[3]);
  vSqrtCol = _mm256_sqrt_pd(vLastCol);
  v4 = (v4 - vLastRow * vLastCol) * vSqrtRow * vSqrtCol;
  vLastCol = _mm256_set1_pd(vLastRow[4]);
  vSqrtCol = _mm256_sqrt_pd(vLastCol);
  v5 = (v5 - vLastRow * vLastCol) * vSqrtRow * vSqrtCol;

  // 4
  vLastRow = v5;
  vSqrtRow = _mm256_sqrt_pd(vLastRow);

  vLastCol = _mm256_set1_pd(vLastRow[0]);
  vSqrtCol = _mm256_sqrt_pd(vLastCol);
  v1 = (v1 - vLastRow * vLastCol) * vSqrtRow * vSqrtCol;
  vLastCol = _mm256_set1_pd(vLastRow[1]);
  vSqrtCol = _mm256_sqrt_pd(vLastCol);
  v2 = (v2 - vLastRow * vLastCol) * vSqrtRow * vSqrtCol;
  vLastCol = _mm256_set1_pd(vLastRow[2]);
  vSqrtCol = _mm256_sqrt_pd(vLastCol);
  v3 = (v3 - vLastRow * vLastCol) * vSqrtRow * vSqrtCol;
  vLastCol = _mm256_set1_pd(vLastRow[3]);
  vSqrtCol = _mm256_sqrt_pd(vLastCol);
  v4 = (v4 - vLastRow * vLastCol) * vSqrtRow * vSqrtCol;

  // 3
  vLastRow = v4;
  vSqrtRow = _mm256_sqrt_pd(vLastRow);

  vLastCol = _mm256_set1_pd(vLastRow[0]);
  vSqrtCol = _mm256_sqrt_pd(vLastCol);
  v1 = (v1 - vLastRow * vLastCol) * vSqrtRow * vSqrtCol;
  vLastCol = _mm256_set1_pd(vLastRow[1]);
  vSqrtCol = _mm256_sqrt_pd(vLastCol);
  v2 = (v2 - vLastRow * vLastCol) * vSqrtRow * vSqrtCol;
  vLastCol = _mm256_set1_pd(vLastRow[2]);
  vSqrtCol = _mm256_sqrt_pd(vLastCol);
  v3 = (v3 - vLastRow * vLastCol) * vSqrtRow * vSqrtCol;

  // 2
  vLastRow = v3;
  vSqrtRow = _mm256_sqrt_pd(vLastRow);

  vLastCol = _mm256_set1_pd(vLastRow[0]);
  vSqrtCol = _mm256_sqrt_pd(vLastCol);
  v1 = (v1 - vLastRow * vLastCol) * vSqrtRow * vSqrtCol;
  vLastCol = _mm256_set1_pd(vLastRow[1]);
  vSqrtCol = _mm256_sqrt_pd(vLastCol);
  v2 = (v2 - vLastRow * vLastCol) * vSqrtRow * vSqrtCol;

  // 1
  vLastRow = v2;
  vSqrtRow = _mm256_sqrt_pd(vLastRow);

  vLastCol = _mm256_set1_pd(vLastRow[0]);
  vSqrtCol = _mm256_sqrt_pd(vLastCol);
  v1 = (v1 - vLastRow * vLastCol) * vSqrtRow * vSqrtCol;

  return v1[0];
}
[/code]

[out]
test(double const (*) [8]):
  lea r10, [rsp+8]
  and rsp, -32
  push QWORD PTR [r10-8]
  push rbp
  mov rbp, rsp
  push r10
  sub rsp, 1040
  vmovapd ymm7, YMMWORD PTR [rdi+576]
  vbroadcastsd ymm0, QWORD PTR [rbp-16]
  vpermpd ymm2, ymm7, 0
  vsqrtpd ymm15, ymm7
  vpermpd ymm12, ymm7, 255
  vsqrtpd ymm5, ymm2
  vsqrtpd ymm4, ymm12
  vmovapd YMMWORD PTR [rbp-560], ymm2
  vsqrtpd ymm2, ymm0
  vmovapd YMMWORD PTR [rbp-592], ymm5
  vpermpd ymm5, ymm7, 85
  vmovapd YMMWORD PTR [rbp-528], ymm5
  vsqrtpd ymm6, ymm5
  vbroadcastsd ymm5, QWORD PTR [rbp+8]
  vmovapd YMMWORD PTR [rbp-208], ymm4
  vbroadcastsd ymm4, QWORD PTR [rbp+0]
  vmovapd ymm14, ymm5
  vsqrtpd ymm9, ymm5
  vfnmadd213pd ymm14, ymm7, YMMWORD PTR [rdi+448]
  vsqrtpd ymm8, ymm4
  vmovapd YMMWORD PTR [rbp-624], ymm6
  vpermpd ymm6, ymm7, 170
  vmovapd YMMWORD PTR [rbp-496], ymm6
  vsqrtpd ymm1, ymm6
  vmulpd ymm6, ymm14, ymm15
  vmovapd YMMWORD PTR [rbp-656], ymm1
  vbroadcastsd ymm1, QWORD PTR [rbp-8]
  vsqrtpd ymm3, ymm1
  vmulpd ymm6, ymm6, ymm9
  vpermpd ymm13, ymm6, 0
  vsqrtpd ymm14, ymm6
  vsqrtpd ymm10, ymm13
  vmovapd YMMWORD PTR [rbp-464], ymm13
  vpermpd ymm13, ymm6, 170
  vmovapd YMMWORD PTR [rbp-688], ymm10
  vpermpd ymm10, ymm6, 85
  vsqrtpd ymm11, ymm10
  vmovapd YMMWORD PTR [rbp-432], ymm10
  vmovapd YMMWORD PTR [rbp-720], ymm11
  vsqrtpd ymm11, ymm13
  vmulpd ymm13, ymm6, ymm13
  vmovapd YMMWORD PTR [rbp-752], ymm11
  vpermpd ymm11, ymm6, 255
  vsqrtpd ymm9, ymm11
  vmovapd YMMWORD PTR [rbp-144], ymm11
  vmovapd YMMWORD PTR [rbp-784], ymm9
  vmulpd ymm9, ymm6, ymm4
  vfnmadd213pd ymm4, ymm7, YMMWORD PTR [rdi+384]
  vmulpd ymm4, ymm4, ymm15
  vfmsub132pd ymm4, ymm9, ymm8
  vmulpd ymm5, ymm4, ymm14
  vmulpd ymm5, ymm5, ymm8
  vpermpd ymm4, ymm5, 0
  vsqrtpd ymm11, ymm5
  vpermpd ymm10, ymm5, 255
  vsqrtpd ymm8, ymm4
  vmovapd YMMWORD PTR [rbp-400], ymm4
  vmovapd YMMWORD PTR [rbp-816], ymm8
  vpermpd ymm8, ymm5, 85
  vsqrtpd ymm9, ymm8
  vmovapd YMMWORD PTR [rbp-368], ymm8
  vsqrtpd ymm8, ymm10
  vmulpd ymm10, ymm10, ymm5
  vmovapd YMMWORD PTR [rbp-848], ymm9
  vpermpd ymm9, ymm5, 170
  vsqrtpd ymm4, ymm9
  vmovapd YMMWORD PTR [rbp-176], ymm9
  vfnmadd213pd ymm12, ymm7, YMMWORD PTR [rdi+192]
  vmovapd YMMWORD PTR [rbp-912], ymm8
  vmulpd ymm8, ymm1, ymm5
  vmovapd YMMWORD PTR [rbp-80], ymm14
  vmulpd ymm9, ymm6, ymm0
  vmovapd YMMWORD PTR [rbp-880], ymm4
  vmulpd ymm4, ymm6, ymm1
  vfnmadd213pd ymm1, ymm7, YMMWORD PTR [rdi+320]
  vmulpd ymm1, ymm1, ymm15
  vfmsub231pd ymm4, ymm1, ymm3
  vmulpd ymm4, ymm4, ymm14
  vmovapd ymm14, ymm11
  vmovapd YMMWORD PTR [rbp-112], ymm14
  vfmsub132pd ymm4, ymm8, ymm3
  vmulpd ymm4, ymm4, ymm11
  vmulpd ymm4, ymm4, ymm3
  vpermpd ymm1, ymm4, 0
  vpermpd ymm11, ymm4, 170
  vpermpd ymm8, ymm4, 255
  vsqrtpd ymm3, ymm1
  vmovapd YMMWORD PTR [rbp-336], ymm1
  vmovapd YMMWORD PTR [rbp-944], ymm3
  vpermpd ymm3, ymm4, 85
  vsqrtpd ymm1, ymm3
  vmovapd YMMWORD PTR [rbp-304], ymm3
  vmulpd ymm3, ymm0, ymm4
  vmovapd YMMWORD PTR [rbp-976], ymm1
  vsqrtpd ymm1, ymm11
  vmulpd ymm11, ymm11, ymm4
  vmovapd YMMWORD PTR [rbp-1008], ymm1
  vsqrtpd ymm1, ymm8
  vmulpd ymm8, ymm8, ymm4
  vmovapd YMMWORD PTR [rbp-1040], ymm1
  vmulpd ymm1, ymm0, ymm5
  vfnmadd213pd ymm0, ymm7, YMMWORD PTR [rdi+256]
  vmulpd ymm0, ymm0, ymm15
  vfmsub231pd ymm9, ymm0, ymm2
  vmulpd ymm9, ymm9, YMMWORD PTR [rbp-80]
  vmulpd ymm0, ymm12, ymm15
  vmovapd ymm12, YMMWORD PTR [rbp-1040]
  vfmsub231pd ymm1, ymm9, ymm2
  vmulpd ymm1, ymm1, ymm14
  vsqrtpd ymm14, ymm4
  vfmsub132pd ymm1, ymm3, ymm2
  vmulpd ymm1, ymm1, ymm14
  vmulpd ymm1, ymm1, ymm2
  vpermpd ymm2, ymm1, 0
  vpermpd ymm9, ymm1, 85
  vsqrtpd ymm3, ymm2
  vmovapd YMMWORD PTR [rbp-272], ymm2
  vsqrtpd ymm2, ymm9
  vmovapd YMMWORD PTR [rbp-240], ymm9
  vpermpd ymm9, ymm1, 170
  vmovapd YMMWORD PTR [rbp-1072], ymm3
  vsqrtpd ymm3, ymm9
  vmovapd YMMWORD PTR [rbp-1104], ymm2
  vmulpd ymm9, ymm9, ymm1
  vmovapd YMMWORD PTR [rbp-1136], ymm3
  vpermpd ymm3, ymm1, 255
  vsqrtpd ymm2, ymm3
  vmulpd ymm3, ymm3, ymm1
  vmovapd YMMWORD PTR [rbp-1168], ymm2
  vmulpd ymm2, ymm6, YMMWORD PTR [rbp-144]
  vfmsub132pd ymm0, ymm2, YMMWORD PTR [rbp-208]
  vmovapd YMMWORD PTR [rbp-144], ymm14
  vmulpd ymm0, ymm0, YMMWORD PTR [rbp-80]
  vfmsub132pd ymm0, ymm10, YMMWORD PTR [rbp-784]
  vmulpd ymm0, ymm0, YMMWORD PTR [rbp-112]
  vfmsub132pd ymm0, ymm8, YMMWORD PTR [rbp-912]
  vmulpd ymm0, ymm0, ymm14
  vsqrtpd ymm14, ymm1
  vfmsub132pd ymm12, ymm3, ymm0
  vmulpd ymm2, ymm12, ymm14
  vmulpd ymm2, ymm2, YMMWORD PTR [rbp-1168]
  vsqrtpd ymm10, ymm2
  vpermpd ymm12, ymm2, 0
  vmovapd YMMWORD PTR [rbp-208], ymm12
  vmovapd YMMWORD PTR [rbp-784], ymm10
  vsqrtpd ymm10, ymm12
  vmovapd YMMWORD PTR [rbp-912], ymm10
  vpermpd ymm10, ymm2, 85
  vsqrtpd ymm8, ymm10
  vmulpd ymm10, ymm10, ymm2
  vmovapd YMMWORD PTR [rbp-1040], ymm8
  vmovapd ymm0, YMMWORD PTR [rbp-496]
  vpermpd ymm8, ymm2, 170
  vfnmadd213pd ymm0, ymm7, YMMWORD PTR [rdi+128]
  vmulpd ymm3, ymm5, YMMWORD PTR [rbp-176]
  vsqrtpd ymm12, ymm8
  vmovapd YMMWORD PTR [rbp-176], ymm14
  vmulpd ymm8, ymm8, ymm2
  vmulpd ymm0, ymm0, ymm15
  vfmsub132pd ymm0, ymm13, YMMWORD PTR [rbp-656]
  vmulpd ymm0, ymm0, YMMWORD PTR [rbp-80]
  vfmsub132pd ymm0, ymm3, YMMWORD PTR [rbp-752]
  vmovapd ymm3, YMMWORD PTR [rbp-784]
  vmulpd ymm0, ymm0, YMMWORD PTR [rbp-112]
  vfmsub132pd ymm0, ymm11, YMMWORD PTR [rbp-880]
  vmovapd ymm11, YMMWORD PTR [rbp-1136]
  vmulpd ymm0, ymm0, YMMWORD PTR [rbp-144]
  vfmsub132pd ymm0, ymm9, YMMWORD PTR [rbp-1008]
  vmulpd ymm0, ymm0, ymm14
  vmulpd ymm14, ymm5, YMMWORD PTR [rbp-368]
  vfmsub132pd ymm11, ymm8, ymm0
  vmulpd ymm0, ymm4, YMMWORD PTR [rbp-304]
  vmovapd YMMWORD PTR [rbp-304], ymm14
  vmulpd ymm3, ymm3, ymm11
  vmulpd ymm3, ymm3, ymm12
  vmulpd ymm12, ymm1, YMMWORD PTR [rbp-240]
  vmovapd YMMWORD PTR [rbp-240], ymm0
  vmulpd ymm0, ymm6, YMMWORD PTR [rbp-432]
  vpermpd ymm8, ymm3, 0
  vsqrtpd ymm13, ymm3
  vpermpd ymm9, ymm3, 85
  vsqrtpd ymm11, ymm8
  vmovapd ymm14, ymm0
  vmovapd ymm0, YMMWORD PTR [rbp-528]
  vfnmadd213pd ymm0, ymm7, YMMWORD PTR [rdi+64]
  vmovapd YMMWORD PTR [rbp-496], ymm11
  vsqrtpd ymm11, ymm9
  vmulpd ymm9, ymm9, ymm3
  vmulpd ymm0, ymm0, ymm15
  vfmsub132pd ymm0, ymm14, YMMWORD PTR [rbp-624]
  vmovapd ymm14, YMMWORD PTR [rbp-304]
  vmulpd ymm0, ymm0, YMMWORD PTR [rbp-80]
  vfmsub132pd ymm0, ymm14, YMMWORD PTR [rbp-720]
  vmovapd ymm14, YMMWORD PTR [rbp-240]
  vmulpd ymm0, ymm0, YMMWORD PTR [rbp-112]
  vfmsub132pd ymm0, ymm14, YMMWORD PTR [rbp-848]
  vsqrtpd ymm14, ymm2
  vmulpd ymm0, ymm0, YMMWORD PTR [rbp-144]
  vfmsub132pd ymm0, ymm12, YMMWORD PTR [rbp-976]
  vmulpd ymm0, ymm0, YMMWORD PTR [rbp-176]
  vfmsub132pd ymm0, ymm10, YMMWORD PTR [rbp-1104]
  vmulpd ymm6, ymm6, YMMWORD PTR [rbp-464]
  vmulpd ymm5, ymm5, YMMWORD PTR [rbp-400]
  vmulpd ymm4, ymm4, YMMWORD PTR [rbp-336]
  vmulpd ymm1, ymm1, YMMWORD PTR [rbp-272]
  vmulpd ymm0, ymm0, ymm14
  vfmsub132pd ymm0, ymm9, YMMWORD PTR [rbp-1040]
  vmulpd ymm2, ymm2, YMMWORD PTR [rbp-208]
  vmulpd ymm3, ymm8, ymm3
  vmulpd ymm0, ymm0, ymm13
  vmulpd ymm11, ymm0, ymm11
  vpermpd ymm0, ymm11, 0
  vsqrtpd ymm12, ymm11
  vmulpd ymm11, ymm0, ymm11
  vsqrtpd ymm10, ymm0
  vmovapd ymm0, YMMWORD PTR [rbp-560]
  vfnmadd213pd ymm7, ymm0, YMMWORD PTR [rdi]
  vmulpd ymm7, ymm7, ymm15
  vfmsub132pd ymm7, ymm6, YMMWORD PTR [rbp-592]
  vmulpd ymm7, ymm7, YMMWORD PTR [rbp-80]
  vfmsub132pd ymm7, ymm5, YMMWORD PTR [rbp-688]
  vmulpd ymm7, ymm7, YMMWORD PTR [rbp-112]
  vfmsub132pd ymm7, ymm4, YMMWORD PTR [rbp-816]
  vmulpd ymm0, ymm7, YMMWORD PTR [rbp-144]
  vfmsub132pd ymm0, ymm1, YMMWORD PTR [rbp-944]
  vmulpd ymm0, ymm0, YMMWORD PTR [rbp-176]
  vfmsub132pd ymm0, ymm2, YMMWORD PTR [rbp-1072]
  vmulpd ymm0, ymm0, ymm14
  vfmsub132pd ymm0, ymm3, YMMWORD PTR [rbp-912]
  vmulpd ymm0, ymm0, ymm13
  vfmsub132pd ymm0, ymm11, YMMWORD PTR [rbp-496]
  vmulpd ymm0, ymm0, ymm12
  vmulpd ymm0, ymm0, ymm10
  vzeroupper
  add rsp, 1040
  pop r10
  pop rbp
  lea rsp, [r10-8]
  ret
[/out]

Reply via email to