https://gcc.gnu.org/bugzilla/show_bug.cgi?id=102552
Bug ID: 102552 Summary: i386 failing tests after r12-4038-g6de9f0c13b27c34336587da19d03200f8cc6bcd5 Product: gcc Version: 12.0 Status: UNCONFIRMED Severity: normal Priority: P3 Component: target Assignee: unassigned at gcc dot gnu.org Reporter: marxin at gcc dot gnu.org Target Milestone: --- The following 2 tests do fail after my revision: FAIL: gcc.target/i386/avx512fp16-reduce-op-2.c scan-tree-dump-times optimized ".REDUC_MAX" 3 FAIL: gcc.target/i386/avx512fp16-reduce-op-2.c scan-tree-dump-times optimized ".REDUC_MIN" 3 FAIL: gcc.target/i386/avx512fp16-reduce-op-2.c scan-tree-dump-times optimized ".REDUC_PLUS" 3 FAIL: gcc.target/i386/intrinsics_4.c (test for excess errors) Apparently, it does not happen on all x86_64 systems, strange: $ cat reduc.c _Float16 __attribute__((noipa, target("avx512fp16,avx512vl"), optimize("Ofast"))) reduc_add_128 (_Float16* p) { _Float16 sum = 0; for (int i = 0; i != 8; i++) sum += p[i]; return sum; } $ ./xgcc -B. reduc.c -Ofast -fdump-tree-optimized=/dev/stdout ;; Function reduc_add_128 (reduc_add_128, funcdef_no=0, decl_uid=1978, cgraph_uid=1, symbol_order=0) __attribute__((optimize ("Ofast"), target ("avx512fp16,avx512vl"), noipa, noinline, noclone, no_icf)) _Float16 reduc_add_128 (_Float16 * p) { vector(8) _Float16 vect__4.6; _Float16 _24; <bb 2> [local count: 119292720]: vect__4.6_5 = MEM <vector(8) _Float16> [(_Float16 *)p_9(D)]; _24 = .REDUC_PLUS (vect__4.6_5); [tail call] return _24; } while on a different box it does: $ ./xgcc -B. reduc.c -Ofast -fdump-tree-optimized=/dev/stdout ;; Function reduc_add_128 (reduc_add_128, funcdef_no=0, decl_uid=1978, cgraph_uid=1, symbol_order=0) __attribute__((optimize ("Ofast"), target ("avx512fp16,avx512vl"), noipa, noinline, noclone, no_icf)) _Float16 reduc_add_128 (_Float16 * p) { _Float16 sum; _Float16 _4; _Float16 _5; _Float16 _6; _Float16 _7; _Float16 _12; _Float16 _13; _Float16 _22; _Float16 _23; _Float16 _30; _Float16 _40; _Float16 _50; _Float16 _60; _Float16 _70; _Float16 _80; <bb 2> [local count: 119292720]: _6 = *p_9(D); _30 = MEM[(_Float16 *)p_9(D) + 2B]; _40 = MEM[(_Float16 *)p_9(D) + 4B]; _13 = _30 + _40; _50 = MEM[(_Float16 *)p_9(D) + 6B]; _60 = MEM[(_Float16 *)p_9(D) + 8B]; _12 = _50 + _60; _22 = _12 + _13; _70 = MEM[(_Float16 *)p_9(D) + 10B]; _80 = MEM[(_Float16 *)p_9(D) + 12B]; _7 = _70 + _80; _4 = MEM[(_Float16 *)p_9(D) + 14B]; _5 = _4 + _6; _23 = _5 + _7; sum_10 = _22 + _23; return sum_10; }