https://gcc.gnu.org/bugzilla/show_bug.cgi?id=83479

--- Comment #4 from Daniel Fruzynski <bugzi...@poradnik-webmastera.com> ---
Rule No.1: never log bugs before morning coffee ;)

This does not produce warnings, compiled with "-O3 -march=haswell -mavx512f
-mavx512vl -mavx512bw -mavx512dq -mavx512cd -Wall -Werror".
[code]
#include "immintrin.h"

double test(const double data[9][8])
{
  __m512d vLastRow, vLastCol, vSqrtRow, vSqrtCol;

  __m512d v1 = _mm512_load_pd (&data[0][0]);
  __m512d v2 = _mm512_load_pd (&data[1][0]);
  __m512d v3 = _mm512_load_pd (&data[2][0]);
  __m512d v4 = _mm512_load_pd (&data[3][0]);
  __m512d v5 = _mm512_load_pd (&data[4][0]);
  __m512d v6 = _mm512_load_pd (&data[5][0]);
  __m512d v7 = _mm512_load_pd (&data[6][0]);
  __m512d v8 = _mm512_load_pd (&data[7][0]);

  // 8
  vLastRow = _mm512_load_pd (&data[9][0]);
  vSqrtRow = _mm512_sqrt_pd(vLastRow);

  vLastCol = _mm512_set1_pd(vLastRow[0]);
  vSqrtCol = _mm512_sqrt_pd(vLastCol);
  v1 = (v1 - vLastRow * vLastCol) * vSqrtRow * vSqrtCol;
  vLastCol = _mm512_set1_pd(vLastRow[1]);
  vSqrtCol = _mm512_sqrt_pd(vLastCol);
  v2 = (v2 - vLastRow * vLastCol) * vSqrtRow * vSqrtCol;
  vLastCol = _mm512_set1_pd(vLastRow[2]);
  vSqrtCol = _mm512_sqrt_pd(vLastCol);
  v3 = (v3 - vLastRow * vLastCol) * vSqrtRow * vSqrtCol;
  vLastCol = _mm512_set1_pd(vLastRow[3]);
  vSqrtCol = _mm512_sqrt_pd(vLastCol);
  v4 = (v4 - vLastRow * vLastCol) * vSqrtRow * vSqrtCol;
  vLastCol = _mm512_set1_pd(vLastRow[4]);
  vSqrtCol = _mm512_sqrt_pd(vLastCol);
  v5 = (v5 - vLastRow * vLastCol) * vSqrtRow * vSqrtCol;
  vLastCol = _mm512_set1_pd(vLastRow[5]);
  vSqrtCol = _mm512_sqrt_pd(vLastCol);
  v6 = (v6 - vLastRow * vLastCol) * vSqrtRow * vSqrtCol;
  vLastCol = _mm512_set1_pd(vLastRow[6]);
  vSqrtCol = _mm512_sqrt_pd(vLastCol);
  v7 = (v7 - vLastRow * vLastCol) * vSqrtRow * vSqrtCol;
  vLastCol = _mm512_set1_pd(vLastRow[7]);
  vSqrtCol = _mm512_sqrt_pd(vLastCol);
  v8 = (v8 - vLastRow * vLastCol) * vSqrtRow * vSqrtCol;

  // 7
  vLastRow = v8;
  vSqrtRow = _mm512_sqrt_pd(vLastRow);

  vLastCol = _mm512_set1_pd(vLastRow[0]);
  vSqrtCol = _mm512_sqrt_pd(vLastCol);
  v1 = (v1 - vLastRow * vLastCol) * vSqrtRow * vSqrtCol;
  vLastCol = _mm512_set1_pd(vLastRow[1]);
  vSqrtCol = _mm512_sqrt_pd(vLastCol);
  v2 = (v2 - vLastRow * vLastCol) * vSqrtRow * vSqrtCol;
  vLastCol = _mm512_set1_pd(vLastRow[2]);
  vSqrtCol = _mm512_sqrt_pd(vLastCol);
  v3 = (v3 - vLastRow * vLastCol) * vSqrtRow * vSqrtCol;
  vLastCol = _mm512_set1_pd(vLastRow[3]);
  vSqrtCol = _mm512_sqrt_pd(vLastCol);
  v4 = (v4 - vLastRow * vLastCol) * vSqrtRow * vSqrtCol;
  vLastCol = _mm512_set1_pd(vLastRow[4]);
  vSqrtCol = _mm512_sqrt_pd(vLastCol);
  v5 = (v5 - vLastRow * vLastCol) * vSqrtRow * vSqrtCol;
  vLastCol = _mm512_set1_pd(vLastRow[5]);
  vSqrtCol = _mm512_sqrt_pd(vLastCol);
  v6 = (v6 - vLastRow * vLastCol) * vSqrtRow * vSqrtCol;
  vLastCol = _mm512_set1_pd(vLastRow[6]);
  vSqrtCol = _mm512_sqrt_pd(vLastCol);
  v7 = (v7 - vLastRow * vLastCol) * vSqrtRow * vSqrtCol;

  // 6
  vLastRow = v7;
  vSqrtRow = _mm512_sqrt_pd(vLastRow);

  vLastCol = _mm512_set1_pd(vLastRow[0]);
  vSqrtCol = _mm512_sqrt_pd(vLastCol);
  v1 = (v1 - vLastRow * vLastCol) * vSqrtRow * vSqrtCol;
  vLastCol = _mm512_set1_pd(vLastRow[1]);
  vSqrtCol = _mm512_sqrt_pd(vLastCol);
  v2 = (v2 - vLastRow * vLastCol) * vSqrtRow * vSqrtCol;
  vLastCol = _mm512_set1_pd(vLastRow[2]);
  vSqrtCol = _mm512_sqrt_pd(vLastCol);
  v3 = (v3 - vLastRow * vLastCol) * vSqrtRow * vSqrtCol;
  vLastCol = _mm512_set1_pd(vLastRow[3]);
  vSqrtCol = _mm512_sqrt_pd(vLastCol);
  v4 = (v4 - vLastRow * vLastCol) * vSqrtRow * vSqrtCol;
  vLastCol = _mm512_set1_pd(vLastRow[4]);
  vSqrtCol = _mm512_sqrt_pd(vLastCol);
  v5 = (v5 - vLastRow * vLastCol) * vSqrtRow * vSqrtCol;
  vLastCol = _mm512_set1_pd(vLastRow[5]);
  vSqrtCol = _mm512_sqrt_pd(vLastCol);
  v6 = (v6 - vLastRow * vLastCol) * vSqrtRow * vSqrtCol;

  // 5
  vLastRow = v6;
  vSqrtRow = _mm512_sqrt_pd(vLastRow);

  vLastCol = _mm512_set1_pd(vLastRow[0]);
  vSqrtCol = _mm512_sqrt_pd(vLastCol);
  v1 = (v1 - vLastRow * vLastCol) * vSqrtRow * vSqrtCol;
  vLastCol = _mm512_set1_pd(vLastRow[1]);
  vSqrtCol = _mm512_sqrt_pd(vLastCol);
  v2 = (v2 - vLastRow * vLastCol) * vSqrtRow * vSqrtCol;
  vLastCol = _mm512_set1_pd(vLastRow[2]);
  vSqrtCol = _mm512_sqrt_pd(vLastCol);
  v3 = (v3 - vLastRow * vLastCol) * vSqrtRow * vSqrtCol;
  vLastCol = _mm512_set1_pd(vLastRow[3]);
  vSqrtCol = _mm512_sqrt_pd(vLastCol);
  v4 = (v4 - vLastRow * vLastCol) * vSqrtRow * vSqrtCol;
  vLastCol = _mm512_set1_pd(vLastRow[4]);
  vSqrtCol = _mm512_sqrt_pd(vLastCol);
  v5 = (v5 - vLastRow * vLastCol) * vSqrtRow * vSqrtCol;

  // 4
  vLastRow = v5;
  vSqrtRow = _mm512_sqrt_pd(vLastRow);

  vLastCol = _mm512_set1_pd(vLastRow[0]);
  vSqrtCol = _mm512_sqrt_pd(vLastCol);
  v1 = (v1 - vLastRow * vLastCol) * vSqrtRow * vSqrtCol;
  vLastCol = _mm512_set1_pd(vLastRow[1]);
  vSqrtCol = _mm512_sqrt_pd(vLastCol);
  v2 = (v2 - vLastRow * vLastCol) * vSqrtRow * vSqrtCol;
  vLastCol = _mm512_set1_pd(vLastRow[2]);
  vSqrtCol = _mm512_sqrt_pd(vLastCol);
  v3 = (v3 - vLastRow * vLastCol) * vSqrtRow * vSqrtCol;
  vLastCol = _mm512_set1_pd(vLastRow[3]);
  vSqrtCol = _mm512_sqrt_pd(vLastCol);
  v4 = (v4 - vLastRow * vLastCol) * vSqrtRow * vSqrtCol;

  // 3
  vLastRow = v4;
  vSqrtRow = _mm512_sqrt_pd(vLastRow);

  vLastCol = _mm512_set1_pd(vLastRow[0]);
  vSqrtCol = _mm512_sqrt_pd(vLastCol);
  v1 = (v1 - vLastRow * vLastCol) * vSqrtRow * vSqrtCol;
  vLastCol = _mm512_set1_pd(vLastRow[1]);
  vSqrtCol = _mm512_sqrt_pd(vLastCol);
  v2 = (v2 - vLastRow * vLastCol) * vSqrtRow * vSqrtCol;
  vLastCol = _mm512_set1_pd(vLastRow[2]);
  vSqrtCol = _mm512_sqrt_pd(vLastCol);
  v3 = (v3 - vLastRow * vLastCol) * vSqrtRow * vSqrtCol;

  // 2
  vLastRow = v3;
  vSqrtRow = _mm512_sqrt_pd(vLastRow);

  vLastCol = _mm512_set1_pd(vLastRow[0]);
  vSqrtCol = _mm512_sqrt_pd(vLastCol);
  v1 = (v1 - vLastRow * vLastCol) * vSqrtRow * vSqrtCol;
  vLastCol = _mm512_set1_pd(vLastRow[1]);
  vSqrtCol = _mm512_sqrt_pd(vLastCol);
  v2 = (v2 - vLastRow * vLastCol) * vSqrtRow * vSqrtCol;

  // 1
  vLastRow = v2;
  vSqrtRow = _mm512_sqrt_pd(vLastRow);

  vLastCol = _mm512_set1_pd(vLastRow[0]);
  vSqrtCol = _mm512_sqrt_pd(vLastCol);
  v1 = (v1 - vLastRow * vLastCol) * vSqrtRow * vSqrtCol;

  return v1[0];
}
[/code]

Reply via email to