Hi! On 2023-12-13T08:14:28+0000, Di Zhao OS <diz...@os.amperecomputing.com> wrote: > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/pr110279-2.c > @@ -0,0 +1,41 @@ > +/* PR tree-optimization/110279 */ > +/* { dg-do compile } */ > +/* { dg-options "-Ofast --param tree-reassoc-width=4 --param > fully-pipelined-fma=1 -fdump-tree-reassoc2-details -fdump-tree-optimized" } */ > +/* { dg-additional-options "-march=armv8.2-a" { target aarch64-*-* } } */ > + > +#define LOOP_COUNT 800000000 > +typedef double data_e; > + > +#include <stdio.h> > + > +__attribute_noinline__ data_e > +foo (data_e in)
Pushed to master branch commit 91e9e8faea4086b3b8aef2355fc12c1559d425f6 "Fix 'gcc.dg/pr110279-2.c' syntax error due to '__attribute_noinline__'", see attached. However: > +{ > + data_e a1, a2, a3, a4; > + data_e tmp, result = 0; > + a1 = in + 0.1; > + a2 = in * 0.1; > + a3 = in + 0.01; > + a4 = in * 0.59; > + > + data_e result2 = 0; > + > + for (int ic = 0; ic < LOOP_COUNT; ic++) > + { > + /* Test that a complete FMA chain with length=4 is not broken. */ > + tmp = a1 + a2 * a2 + a3 * a3 + a4 * a4 ; > + result += tmp - ic; > + result2 = result2 / 2 - tmp; > + > + a1 += 0.91; > + a2 += 0.1; > + a3 -= 0.01; > + a4 -= 0.89; > + > + } > + > + return result + result2; > +} > + > +/* { dg-final { scan-tree-dump-not "was chosen for reassociation" > "reassoc2"} } */ > +/* { dg-final { scan-tree-dump-times {\.FMA } 3 "optimized"} } */ ..., I still see these latter two tree dump scans FAIL, for GCN: $ grep -C2 'was chosen for reassociation' pr110279-2.c.197t.reassoc2 2 *: a3_40 2 *: a2_39 Width = 4 was chosen for reassociation Transforming _15 = powmult_1 + powmult_3; into _63 = powmult_1 + a1_38; $ grep -F .FMA pr110279-2.c.265t.optimized _63 = .FMA (a2_39, a2_39, a1_38); _64 = .FMA (a3_40, a3_40, powmult_5); ..., nvptx: $ grep -C2 'was chosen for reassociation' pr110279-2.c.197t.reassoc2 2 *: a3_40 2 *: a2_39 Width = 4 was chosen for reassociation Transforming _15 = powmult_1 + powmult_3; into _63 = powmult_1 + a1_38; $ grep -F .FMA pr110279-2.c.265t.optimized _63 = .FMA (a2_39, a2_39, a1_38); _64 = .FMA (a3_40, a3_40, powmult_5); ..., but also x86_64-pc-linux-gnu: $ grep -C2 'was chosen for reassociation' pr110279-2.c.197t.reassoc2 2 *: a3_40 2 *: a2_39 Width = 2 was chosen for reassociation Transforming _15 = powmult_1 + powmult_3; into _63 = powmult_1 + powmult_3; $ grep -cF .FMA pr110279-2.c.265t.optimized 0 Grüße Thomas ----------------- Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht München, HRB 106955
>From 91e9e8faea4086b3b8aef2355fc12c1559d425f6 Mon Sep 17 00:00:00 2001 From: Thomas Schwinge <tho...@codesourcery.com> Date: Fri, 15 Dec 2023 10:03:12 +0100 Subject: [PATCH] Fix 'gcc.dg/pr110279-2.c' syntax error due to '__attribute_noinline__' For example, for GCN or nvptx target configurations, using newlib: FAIL: gcc.dg/pr110279-2.c (test for excess errors) UNRESOLVED: gcc.dg/pr110279-2.c scan-tree-dump-not reassoc2 "was chosen for reassociation" UNRESOLVED: gcc.dg/pr110279-2.c scan-tree-dump-times optimized "\\.FMA " 3 [...]/source-gcc/gcc/testsuite/gcc.dg/pr110279-2.c:11:1: error: unknown type name '__attribute_noinline__' [...]/source-gcc/gcc/testsuite/gcc.dg/pr110279-2.c:12:1: error: expected '=', ',', ';', 'asm' or '__attribute__' before 'foo' We cannot assume 'stdio.h' to define '__attribute_noinline__' -- but then, that also isn't necessary for this test case (there is nothing to inline into). gcc/testsuite/ * gcc.dg/pr110279-2.c: Don't '#include <stdio.h>'. Remove '__attribute_noinline__'. --- gcc/testsuite/gcc.dg/pr110279-2.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/gcc/testsuite/gcc.dg/pr110279-2.c b/gcc/testsuite/gcc.dg/pr110279-2.c index 0304a77aa66..b6b69969c6b 100644 --- a/gcc/testsuite/gcc.dg/pr110279-2.c +++ b/gcc/testsuite/gcc.dg/pr110279-2.c @@ -6,9 +6,7 @@ #define LOOP_COUNT 800000000 typedef double data_e; -#include <stdio.h> - -__attribute_noinline__ data_e +data_e foo (data_e in) { data_e a1, a2, a3, a4; -- 2.34.1