https://gcc.gnu.org/bugzilla/show_bug.cgi?id=102552

            Bug ID: 102552
           Summary: i386 failing tests after
                    r12-4038-g6de9f0c13b27c34336587da19d03200f8cc6bcd5
           Product: gcc
           Version: 12.0
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: target
          Assignee: unassigned at gcc dot gnu.org
          Reporter: marxin at gcc dot gnu.org
  Target Milestone: ---

The following 2 tests do fail after my revision:

FAIL: gcc.target/i386/avx512fp16-reduce-op-2.c scan-tree-dump-times optimized
".REDUC_MAX" 3
FAIL: gcc.target/i386/avx512fp16-reduce-op-2.c scan-tree-dump-times optimized
".REDUC_MIN" 3
FAIL: gcc.target/i386/avx512fp16-reduce-op-2.c scan-tree-dump-times optimized
".REDUC_PLUS" 3
FAIL: gcc.target/i386/intrinsics_4.c (test for excess errors)

Apparently, it does not happen on all x86_64 systems, strange:

$ cat reduc.c
_Float16
__attribute__((noipa, target("avx512fp16,avx512vl"), optimize("Ofast")))
reduc_add_128 (_Float16* p)
{
  _Float16 sum = 0;
  for (int i = 0; i != 8; i++)
    sum += p[i];
  return sum;
}

$ ./xgcc -B. reduc.c -Ofast -fdump-tree-optimized=/dev/stdout

;; Function reduc_add_128 (reduc_add_128, funcdef_no=0, decl_uid=1978,
cgraph_uid=1, symbol_order=0)

__attribute__((optimize ("Ofast"), target ("avx512fp16,avx512vl"), noipa,
noinline, noclone, no_icf))
_Float16 reduc_add_128 (_Float16 * p)
{
  vector(8) _Float16 vect__4.6;
  _Float16 _24;

  <bb 2> [local count: 119292720]:
  vect__4.6_5 = MEM <vector(8) _Float16> [(_Float16 *)p_9(D)];
  _24 = .REDUC_PLUS (vect__4.6_5); [tail call]
  return _24;

}

while on a different box it does:

$ ./xgcc -B. reduc.c -Ofast  -fdump-tree-optimized=/dev/stdout

;; Function reduc_add_128 (reduc_add_128, funcdef_no=0, decl_uid=1978,
cgraph_uid=1, symbol_order=0)

__attribute__((optimize ("Ofast"), target ("avx512fp16,avx512vl"), noipa,
noinline, noclone, no_icf))
_Float16 reduc_add_128 (_Float16 * p)
{
  _Float16 sum;
  _Float16 _4;
  _Float16 _5;
  _Float16 _6;
  _Float16 _7;
  _Float16 _12;
  _Float16 _13;
  _Float16 _22;
  _Float16 _23;
  _Float16 _30;
  _Float16 _40;
  _Float16 _50;
  _Float16 _60;
  _Float16 _70;
  _Float16 _80;

  <bb 2> [local count: 119292720]:
  _6 = *p_9(D);
  _30 = MEM[(_Float16 *)p_9(D) + 2B];
  _40 = MEM[(_Float16 *)p_9(D) + 4B];
  _13 = _30 + _40;
  _50 = MEM[(_Float16 *)p_9(D) + 6B];
  _60 = MEM[(_Float16 *)p_9(D) + 8B];
  _12 = _50 + _60;
  _22 = _12 + _13;
  _70 = MEM[(_Float16 *)p_9(D) + 10B];
  _80 = MEM[(_Float16 *)p_9(D) + 12B];
  _7 = _70 + _80;
  _4 = MEM[(_Float16 *)p_9(D) + 14B];
  _5 = _4 + _6;
  _23 = _5 + _7;
  sum_10 = _22 + _23;
  return sum_10;

}

Reply via email to