[committed] Fix handling of lastprivate clauses on omp for simd reduction (inscan, ...) loops
Hi! lastprivate or conditional lastprivate could be modified either in the input phase, or in the scan phase (but not both), and as we don't really know in which one it is, we need to copy the value from the first simd into simd lanes of the second simd. Bootstrapped/regtested on x86_64-linux and i686-linux, committed to trunk. 2019-07-06 Jakub Jelinek * omp-low.c (lower_rec_input_clauses): For lastprivate clauses in ctx->for_simd_scan_phase simd copy the outer var to the privatized variable(s). For conditional lastprivate look through outer GIMPLE_OMP_SCAN context. (lower_omp_1): For conditional lastprivate look through outer GIMPLE_OMP_SCAN context. * testsuite/libgomp.c/scan-19.c: New test. * testsuite/libgomp.c/scan-20.c: New test. --- gcc/omp-low.c.jj2019-07-06 16:48:02.373495843 +0200 +++ gcc/omp-low.c 2019-07-06 18:36:32.268367658 +0200 @@ -5006,6 +5006,17 @@ lower_rec_input_clauses (tree clauses, g lower_omp (, ctx->outer); gimple_seq_add_seq ([1], tseq); } + if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LASTPRIVATE + && ctx->for_simd_scan_phase) + { + x = unshare_expr (ivar); + tree orig_v + = build_outer_var_ref (var, ctx, + OMP_CLAUSE_LASTPRIVATE); + x = lang_hooks.decls.omp_clause_assign_op (c, x, +orig_v); + gimplify_and_add (x, [0]); + } if (y) { y = lang_hooks.decls.omp_clause_dtor (c, ivar); @@ -5035,6 +5046,16 @@ lower_rec_input_clauses (tree clauses, g } if (nx) gimplify_and_add (nx, ilist); + if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LASTPRIVATE + && is_simd + && ctx->for_simd_scan_phase) + { + tree orig_v = build_outer_var_ref (var, ctx, +OMP_CLAUSE_LASTPRIVATE); + x = lang_hooks.decls.omp_clause_assign_op (c, new_var, +orig_v); + gimplify_and_add (x, ilist); + } /* FALLTHRU */ do_dtor: @@ -5709,11 +5730,12 @@ lower_rec_input_clauses (tree clauses, g && OMP_CLAUSE_LASTPRIVATE_CONDITIONAL (c)) { tree o = lookup_decl (OMP_CLAUSE_DECL (c), ctx); - tree *v - = ctx->lastprivate_conditional_map->get (o); - tree po = lookup_decl (OMP_CLAUSE_DECL (c), ctx->outer); - tree *pv - = ctx->outer->lastprivate_conditional_map->get (po); + omp_context *outer = ctx->outer; + if (gimple_code (outer->stmt) == GIMPLE_OMP_SCAN) + outer = outer->outer; + tree *v = ctx->lastprivate_conditional_map->get (o); + tree po = lookup_decl (OMP_CLAUSE_DECL (c), outer); + tree *pv = outer->lastprivate_conditional_map->get (po); *v = *pv; } } @@ -12421,7 +12443,11 @@ lower_omp_1 (gimple_stmt_iterator *gsi_p { tree clauses; if (up->combined_into_simd_safelen1) - up = up->outer; + { + up = up->outer; + if (gimple_code (up->stmt) == GIMPLE_OMP_SCAN) + up = up->outer; + } if (gimple_code (up->stmt) == GIMPLE_OMP_FOR) clauses = gimple_omp_for_clauses (up->stmt); else --- libgomp/testsuite/libgomp.c/scan-19.c.jj2019-07-06 11:12:15.284732446 +0200 +++ libgomp/testsuite/libgomp.c/scan-19.c 2019-07-06 19:23:18.189268880 +0200 @@ -0,0 +1,119 @@ +/* { dg-require-effective-target size32plus } */ +/* { dg-additional-options "-O2 -fopenmp -fdump-tree-vect-details" } */ +/* { dg-additional-options "-mavx" { target avx_runtime } } */ +/* { dg-final { scan-tree-dump-times "vectorized \[2-6] loops" 2 "vect" { target sse2_runtime } } } */ + +extern void abort (void); +int r, a[1024], b[1024], x, y, z; + +__attribute__((noipa)) void +foo (int *a, int *b) +{ + #pragma omp for simd reduction (inscan, +:r) lastprivate (conditional: z) firstprivate (x) private (y) + for (int i = 0; i < 1024; i++) +{ + { y = a[i]; r += y + x + 12; } + #pragma omp scan inclusive(r) + { b[i] = r; if ((i & 1) == 0 && i < 937) z = r; } +} +} + +__attribute__((noipa)) int +bar (void) +{
[committed] Fix up omp for simd lastprivate (conditional:...) simdlen(1) handling
Hi! It isn't entirely clear to me why all the tests in the testsuite happen to pass, but in the simdlen(1)/if(simd:0) for simd loops lastprivate conditional variables private in the simd need to be copied to the private variables in the worksharing loop unconditionally at the end of the simd loop, not only at the end of simd loop which ran the last iteration. FIxed thusly, bootstrapped/regtested on x86_64-linux and i686-linux, committed to trunk. 2019-07-06 Jakub Jelinek * omp-low.c (struct omp_context): Rename combined_into_simd_safelen0 member to combined_into_simd_safelen1. (lower_rec_input_clauses, lower_omp_1): Adjust uses. (lower_lastprivate_clauses): Likewise. For conditional lastprivate clauses if ctx->combined_into_simd_safelen1 put statements after the predicate conditionalized block rather than into it. --- gcc/omp-low.c.jj2019-07-06 09:51:48.363290048 +0200 +++ gcc/omp-low.c 2019-07-06 16:48:02.373495843 +0200 @@ -140,7 +140,7 @@ struct omp_context /* True if lower_omp_1 should look up lastprivate conditional in parent context. */ - bool combined_into_simd_safelen0; + bool combined_into_simd_safelen1; /* True if there is nested scan context with inclusive clause. */ bool scan_inclusive; @@ -5703,7 +5703,7 @@ lower_rec_input_clauses (tree clauses, g if (gimple_omp_for_combined_into_p (ctx->stmt)) { /* Signal to lower_omp_1 that it should use parent context. */ - ctx->combined_into_simd_safelen0 = true; + ctx->combined_into_simd_safelen1 = true; for (c = clauses; c ; c = OMP_CLAUSE_CHAIN (c)) if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LASTPRIVATE && OMP_CLAUSE_LASTPRIVATE_CONDITIONAL (c)) @@ -6018,6 +6018,7 @@ lower_lastprivate_clauses (tree clauses, bool par_clauses = false; tree simduid = NULL, lastlane = NULL, simtcond = NULL, simtlast = NULL; unsigned HOST_WIDE_INT conditional_off = 0; + gimple_seq post_stmt_list = NULL; /* Early exit if there are no lastprivate or linear clauses. */ for (; clauses ; clauses = OMP_CLAUSE_CHAIN (clauses)) @@ -6107,7 +6108,7 @@ lower_lastprivate_clauses (tree clauses, if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LASTPRIVATE && OMP_CLAUSE_LASTPRIVATE_CONDITIONAL (c) && ctx->lastprivate_conditional_map - && !ctx->combined_into_simd_safelen0) + && !ctx->combined_into_simd_safelen1) { gcc_assert (body_p); if (simduid) @@ -6144,6 +6145,12 @@ lower_lastprivate_clauses (tree clauses, gimple_seq_add_stmt (this_stmt_list, gimple_build_label (lab1)); gimplify_assign (mem2, v, this_stmt_list); } + else if (predicate + && ctx->combined_into_simd_safelen1 + && OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LASTPRIVATE + && OMP_CLAUSE_LASTPRIVATE_CONDITIONAL (c) + && ctx->lastprivate_conditional_map) + this_stmt_list = _stmt_list; if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LASTPRIVATE || (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR @@ -6274,6 +6281,7 @@ lower_lastprivate_clauses (tree clauses, if (label) gimple_seq_add_stmt (stmt_list, gimple_build_label (label)); + gimple_seq_add_seq (stmt_list, post_stmt_list); } /* Lower the OpenACC reductions of CLAUSES for compute axis LEVEL @@ -12412,7 +12420,7 @@ lower_omp_1 (gimple_stmt_iterator *gsi_p if (tree *v = up->lastprivate_conditional_map->get (lhs)) { tree clauses; - if (up->combined_into_simd_safelen0) + if (up->combined_into_simd_safelen1) up = up->outer; if (gimple_code (up->stmt) == GIMPLE_OMP_FOR) clauses = gimple_omp_for_clauses (up->stmt); Jakub
Re: [PATCH] Fix ODR violations in code using
On 05/07/19 19:44 +0100, Jonathan Wakely wrote: On 05/07/19 20:23 +0200, Daniel Krügler wrote: Am Fr., 5. Juli 2019 um 18:13 Uhr schrieb Jonathan Wakely : [..] I decided against the simplification in the second patch, and committed the attached one which is closer to the first patch I sent (preserving the __atomic_add and __exchange_and_add functions even when they just call the built-ins). Tested x86_64-linux, powerpc64-linux, powerpc-aix. Committed to trunk. Unrelated to the actual patch, I noticed some explicit "throw()" forms used as exception specifications - shouldn't these be replaced by either explicit "noexcept" or at least by a library macro that expands to one or the other? Yes, they should be _GLIBCXX_NOTHROW. (I'm sorry, if such unrelated questions are considered as inappropriate for this list). Entirely appropriate, thanks! Here's a patch to fix that and the dumb mistake I made in __atomic_add_dispatch. I'll commit after testing finishes. commit 1542776ebab8848109d125d05a548fca5efb513a Author: Jonathan Wakely Date: Sat Jul 6 21:24:51 2019 +0100 Fix recent regression in __atomic_add_dispatch * include/ext/atomicity.h (__exchange_and_add, __atomic_add): Replace throw() with _GLIBCXX_NOTHROW. (__atomic_add_dispatch): Return after performing atomic increment. diff --git a/libstdc++-v3/include/ext/atomicity.h b/libstdc++-v3/include/ext/atomicity.h index 73225b3de20..333c8843e14 100644 --- a/libstdc++-v3/include/ext/atomicity.h +++ b/libstdc++-v3/include/ext/atomicity.h @@ -55,10 +55,10 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION { __atomic_fetch_add(__mem, __val, __ATOMIC_ACQ_REL); } #else _Atomic_word - __exchange_and_add(volatile _Atomic_word*, int) throw (); + __exchange_and_add(volatile _Atomic_word*, int) _GLIBCXX_NOTHROW; void - __atomic_add(volatile _Atomic_word*, int) throw (); + __atomic_add(volatile _Atomic_word*, int) _GLIBCXX_NOTHROW; #endif inline _Atomic_word @@ -92,7 +92,10 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION { #ifdef __GTHREADS if (__gthread_active_p()) - __atomic_add(__mem, __val); + { + __atomic_add(__mem, __val); + return; + } #endif __atomic_add_single(__mem, __val); }
Re: [PATCH] Add generic support for "noinit" attribute
On 7/4/19 9:27 AM, Christophe Lyon wrote: Hi, Similar to what already exists for TI msp430 or in TI compilers for arm, this patch adds support for the "noinit" attribute. It is convenient for embedded targets where the user wants to keep the value of some data when the program is restarted: such variables are not zero-initialized. It is mostly a helper/shortcut to placing variables in a dedicated section. It's probably desirable to add the following chunk to the GNU linker: diff --git a/ld/emulparams/armelf.sh b/ld/emulparams/armelf.sh index 272a8bc..9555cec 100644 --- a/ld/emulparams/armelf.sh +++ b/ld/emulparams/armelf.sh @@ -10,7 +10,19 @@ OTHER_TEXT_SECTIONS='*(.glue_7t) *(.glue_7) *(.vfp11_veneer) *(.v4_bx)' OTHER_BSS_SYMBOLS="${CREATE_SHLIB+PROVIDE (}__bss_start__ = .${CREATE_SHLIB+)};" OTHER_BSS_END_SYMBOLS="${CREATE_SHLIB+PROVIDE (}_bss_end__ = .${CREATE_SHLIB+)}; ${CREATE_SHLIB+PROVIDE (}__bss_end__ = .${CREATE_SHLIB+)};" OTHER_END_SYMBOLS="${CREATE_SHLIB+PROVIDE (}__end__ = .${CREATE_SHLIB+)};" -OTHER_SECTIONS='.note.gnu.arm.ident 0 : { KEEP (*(.note.gnu.arm.ident)) }' +OTHER_SECTIONS=' +.note.gnu.arm.ident 0 : { KEEP (*(.note.gnu.arm.ident)) } + /* This section contains data that is not initialised during load + *or* application reset. */ + .noinit (NOLOAD) : + { + . = ALIGN(2); + PROVIDE (__noinit_start = .); + *(.noinit) + . = ALIGN(2); + PROVIDE (__noinit_end = .); + } +' so that the noinit section has the "NOLOAD" flag. I'll submit that part separately to the binutils project if OK. However, I'm not sure for which other targets (beyond arm), I should update the linker scripts. I left the new testcase in gcc.target/arm, guarded by dg-do run { target { *-*-eabi } } but since this attribute is now generic, I suspect I should move the test to some other place. But then, which target selector should I use? Finally, I tested on arm-eabi, but not on msp430 for which I do not have the environment, so advice from msp430 maintainers is appreciated. Since msp430 does not use the same default helpers as arm, I left the "noinit" handling code in place, to avoid changing other generic functions which I don't know how to test (default_select_section, default_section_type_flags). Since the section attribute is mutually exclusive with noinit, defining an attribute_exclusions array with the mutually exclusive attributes and setting the last member of the corresponding c_common_attribute_table element to that array (and updating the arrays for the other mutually exclusive attributes) would be the expected way to express that constraint: + { "noinit", 0, 0, true, false, false, false, + handle_noinit_attribute, NULL }, This should also make it possible to remove the explicit handling from handle_section_attribute and handle_noinit_attribute. If that's not completely possible then in the following please be sure to quote the names of the attributes: @@ -1912,6 +1915,13 @@ handle_section_attribute (tree *node, tree ARG_UNUSED (name), tree args, goto fail; } + if (DECL_P (decl) && lookup_attribute ("noinit", DECL_ATTRIBUTES (decl)) != NULL_TREE) +{ + warning_at (DECL_SOURCE_LOCATION (decl), OPT_Wattributes, + "section attribute cannot be applied to variables with noinit attribute"); Martin Thanks, Christophe gcc/ChangeLog: 2019-07-04 Christophe Lyon * doc/extend.texi: Add "noinit" attribute documentation. * varasm.c (default_section_type_flags): Add support for "noinit" section. (default_elf_select_section): Add support for "noinit" attribute. gcc/c-family/ChangeLog: 2019-07-04 Christophe Lyon * c-attribs.c (c_common_attribute_table): Add "noinit" entry. (handle_section_attribute): Add support for "noinit" attribute. (handle_noinit_attribute): New function. gcc/config/ChangeLog: 2019-07-04 Christophe Lyon * msp430/msp430.c (msp430_attribute_table): Remove "noinit" entry. gcc/testsuite/ChangeLog: 2019-07-04 Christophe Lyon * gcc.target/arm/noinit-attribute.c: New test.
Re: [Patch, fortran] PR91077 - [8/9/10 Regression] Wrong indexing when using a pointer
On Sat, Jul 06, 2019 at 02:29:06PM +0100, Paul Richard Thomas wrote: > As anticipated, 8-branch required a different patch but the difference > was much smaller than anticipated. > > Bootstrapped and regetested on FC29/x86_64 - OK for 8-branch? > OK for both patches. -- Steve
Re: PR91068: Fix MIPS fallout from IRA matched operand changes
On 7/5/19 2:51 AM, Richard Sandiford wrote: > PR91068 is a case in which we have (ignoring non-LRA alternatives): > > [(set (match_operand:SI 0 "register_operand" "=l,d?") > (plus:SI (mult:SI (match_operand:SI 1 "register_operand" "d,d") > (match_operand:SI 2 "register_operand" "d,d")) >(match_operand:SI 3 "register_operand" "0,d"))) >(clobber (match_scratch:SI 4 "=X,l")) >(clobber (match_scratch:SI 5 "=X,"))] > > where the first alternative is one instruction but the second is two. > This is very similar to the case that my recent IRA patches were > supposed to help. The crucial difference is that the cheap > alternative requires a single-register class while the expensive > alternative uses general registers. > > This makes a difference when one of operand 0 or 3 can naturally be > allocated to LO but the other can't. If IRA makes that allocation, > both alternatives require one reload of equal cost and so the first > alternative clearly wins. > > However, if we say that tying operands 0 and 3 saves the cost of a full > move, then all other things being equal, IRA will prefer to allocate > both registers to the same GPR. The registers will then naturally > fit the second alternative. > > This has a more drastic effect in the MIPS case than it should because > using the GPR alternative is much more expensive there than it appears > to the RA. But that's really a separate problem and something we were > able to live with before my IRA patch. > > What makes tying less useful here is the fact that the tied register is > a single-register class. I think in those circumstances it's better not > to use tied operands at all and instead use "l" for the inputs. > Allocating the input to LO, and allocating the output to LO, then depend > naturally on class costs. If we decide to allocate at least one of them > to LO, we'll use the cheap alternative, otherwise we'll (correctly) use > the expensive alternative. This effectively restores the situation > before my IRA patch, but this time making the preference on the input > register more explicit. > > I originally wrote the patterns in the early days of IRA, and certainly > well before LRA. I think they were largely influened by reload rather > than RA proper (see the comment above *mul_acc_si, which is all about > the reload behaviour). LRA copes with the two-"l" case just fine. > > The patch may well cause problems for -mno-lra, but I think we should > cull that option anyway. > > Tested on mipsisa64-elf. OK to install? > > Richard > > > 2019-07-05 Richard Sandiford > > gcc/ > PR target/91068 > * config/mips/mips.md (*mul_acc_si, *mul_acc_si_r3900, *macc) > (*msac, *msac_using_macc, *mul_sub_si): Use "l" for input operands > instead of matching them to "l" output operands. Dredging up memories of past work? :) I certainly wonder how much crud we have in the target files that's just not desirable anymore given how far things have moved. But I'm also not terribly inclined to spend the time to figure that out for the embedded targets ;-) OK. Jeff
Re: Fix uninitialised use in mips_split_move
On 7/5/19 2:48 AM, Richard Sandiford wrote: > While testing the fix for PR91068, I hit an rtl checking failure > while building newlib. mips_split_move was decomposing an address that > happened to be symbolic and then tried to access the REGNO of the base > register field, which wasn't initialised but which by chance pointed to > valid memory. > > Tested on mipsisa64-elf. OK to install? > > Richard > > > 2019-07-05 Richard Sandiford > > gcc/ > * config/mips/mips.c (mips_split_move): Zero-initialize addr > and check whether addr.reg is nonnull before using it. OK jeff
Re: [Patch, fortran] PR91077 - [8/9/10 Regression] Wrong indexing when using a pointer
As anticipated, 8-branch required a different patch but the difference was much smaller than anticipated. Bootstrapped and regetested on FC29/x86_64 - OK for 8-branch? Paul 2019-07-06 Paul Thomas PR fortran/91077 * trans-array.c (gfc_conv_scalarized_array_ref) Delete code that gave symbol backend decl for subref arrays. 2019-07-06 Paul Thomas PR fortran/91077 * gfortran.dg/pointer_array_11.f90 : New test. On Sat, 6 Jul 2019 at 11:48, Paul Richard Thomas wrote: > > This problem was caused by the code for scalarized array references to > subref arrays and deferred length variables not obtaining the correct > array descriptor and so getting the array span wrong. As it happens, > the lines, following the deleted part, correctly identify when the > info descriptor is a pointer and provide the span as appropriate. > > Bootstrapped and regtested on FC29/x86_64 - OK for trunk and 9-branch? > 8-branch might be somewhat more difficult to fix but I will give it a > try. This will require a separate submission. > > Paul > > 2019-07-06 Paul Thomas > > PR fortran/91077 > * trans-array.c (gfc_conv_scalarized_array_ref) Delete code > that gave symbol backend decl for subref arrays and deferred > length variables. > > 2019-07-06 Paul Thomas > > PR fortran/91077 > * gfortran.dg/pointer_array_11.f90 : New test. -- "If you can't explain it simply, you don't understand it well enough" - Albert Einstein Index: gcc/fortran/trans-array.c === *** gcc/fortran/trans-array.c (revision 272102) --- gcc/fortran/trans-array.c (working copy) *** gfc_conv_scalarized_array_ref (gfc_se * *** 3422,3431 if (build_class_array_ref (se, base, index)) return; ! if (expr && ((is_subref_array (expr) ! && GFC_DESCRIPTOR_TYPE_P (TREE_TYPE (info->descriptor))) ! || (expr->ts.deferred && (expr->expr_type == EXPR_VARIABLE ! || expr->expr_type == EXPR_FUNCTION decl = expr->symtree->n.sym->backend_decl; /* A pointer array component can be detected from its field decl. Fix --- 3422,3429 if (build_class_array_ref (se, base, index)) return; ! if (expr && (expr->ts.deferred && (expr->expr_type == EXPR_VARIABLE ! || expr->expr_type == EXPR_FUNCTION))) decl = expr->symtree->n.sym->backend_decl; /* A pointer array component can be detected from its field decl. Fix Index: gcc/testsuite/gfortran.dg/pointer_array_11.f90 === *** gcc/testsuite/gfortran.dg/pointer_array_11.f90 (nonexistent) --- gcc/testsuite/gfortran.dg/pointer_array_11.f90 (working copy) *** *** 0 --- 1,90 + ! { dg-do run } + ! + ! Test the fix for PR91077 - both the original test and that in comment #4 of the PR. + ! + ! Contribute by Ygal Klein + ! + program test + implicit none + call original + call comment_4 + contains + subroutine original + integer, parameter :: length = 9 + real(8), dimension(2) :: a, b + integer :: i + type point +real(8) :: x + end type point + + type stored +type(point), dimension(:), allocatable :: np + end type stored + type(stored), dimension(:), pointer :: std =>null() + allocate(std(1)) + allocate(std(1)%np(length)) + std(1)%np(1)%x = 0.3d0 + std(1)%np(2)%x = 0.3555d0 + std(1)%np(3)%x = 0.26782d0 + std(1)%np(4)%x = 0d0 + std(1)%np(5)%x = 1.555d0 + std(1)%np(6)%x = 7.3d0 + std(1)%np(7)%x = 7.8d0 + std(1)%np(8)%x = 6.3d0 + std(1)%np(9)%x = 5.5d0 + !do i = 1, 2 + ! write(*, "('std(1)%np(',i1,')%x = ',1e22.14)") i, std(1)%np(i)%x + !end do + !do i = 1, 2 + ! write(*, "('std(1)%np(1:',i1,') = ',9e22.14)") i, std(1)%np(1:i)%x + !end do + a = std(1)%np(1:2)%x + b = [std(1)%np(1)%x, std(1)%np(2)%x] + !print *,a + !print *,b + if (allocated (std(1)%np)) deallocate (std(1)%np) + if (associated (std)) deallocate (std) + if (norm2(a - b) .gt. 1d-3) stop 1 + end subroutine + + subroutine comment_4 + integer, parameter :: length = 2 + real(8), dimension(length) :: a, b + integer :: i + + type point +real(8) :: x + end type point + + type points +type(point), dimension(:), pointer :: np=>null() + end type points + + type stored +integer :: l +type(points), pointer :: nfpoint=>null() + end type stored + + type(stored), dimension(:), pointer :: std=>null() + + + allocate(std(1)) + allocate(std(1)%nfpoint) + allocate(std(1)%nfpoint%np(length)) + std(1)%nfpoint%np(1)%x = 0.3d0 + std(1)%nfpoint%np(2)%x = 0.3555d0 + + !do i = 1, length + ! write(*, "('std(1)%nfpoint%np(',i1,')%x = ',1e22.14)") i, std(1)%nfpoint%np(i)%x + !end do + !do i = 1, length + ! write(*,
[Patch, fortran] PR91077 - [8/9/10 Regression] Wrong indexing when using a pointer
This problem was caused by the code for scalarized array references to subref arrays and deferred length variables not obtaining the correct array descriptor and so getting the array span wrong. As it happens, the lines, following the deleted part, correctly identify when the info descriptor is a pointer and provide the span as appropriate. Bootstrapped and regtested on FC29/x86_64 - OK for trunk and 9-branch? 8-branch might be somewhat more difficult to fix but I will give it a try. This will require a separate submission. Paul 2019-07-06 Paul Thomas PR fortran/91077 * trans-array.c (gfc_conv_scalarized_array_ref) Delete code that gave symbol backend decl for subref arrays and deferred length variables. 2019-07-06 Paul Thomas PR fortran/91077 * gfortran.dg/pointer_array_11.f90 : New test. Index: gcc/fortran/trans-array.c === *** gcc/fortran/trans-array.c (revision 272089) --- gcc/fortran/trans-array.c (working copy) *** gfc_conv_scalarized_array_ref (gfc_se * *** 3502,3520 return; if (get_CFI_desc (NULL, expr, , ar)) - { decl = build_fold_indirect_ref_loc (input_location, decl); - goto done; - } - - if (expr && ((is_subref_array (expr) - && GFC_DESCRIPTOR_TYPE_P (TREE_TYPE (info->descriptor))) - || (expr->ts.deferred && (expr->expr_type == EXPR_VARIABLE - || expr->expr_type == EXPR_FUNCTION - decl = expr->symtree->n.sym->backend_decl; - - if (decl && GFC_DECL_PTR_ARRAY_P (decl)) - goto done; /* A pointer array component can be detected from its field decl. Fix the descriptor, mark the resulting variable decl and pass it to --- 3502,3508 *** gfc_conv_scalarized_array_ref (gfc_se * *** 3532,3538 decl = info->descriptor; } - done: se->expr = gfc_build_array_ref (base, index, decl); } --- 3520,3525 Index: gcc/testsuite/gfortran.dg/pointer_array_11.f90 === *** gcc/testsuite/gfortran.dg/pointer_array_11.f90 (nonexistent) --- gcc/testsuite/gfortran.dg/pointer_array_11.f90 (working copy) *** *** 0 --- 1,90 + ! { dg-do run } + ! + ! Test the fix for PR91077 - both the original test and that in comment #4 of the PR. + ! + ! Contribute by Ygal Klein + ! + program test + implicit none + call original + call comment_4 + contains + subroutine original + integer, parameter :: length = 9 + real(8), dimension(2) :: a, b + integer :: i + type point +real(8) :: x + end type point + + type stored +type(point), dimension(:), allocatable :: np + end type stored + type(stored), dimension(:), pointer :: std =>null() + allocate(std(1)) + allocate(std(1)%np(length)) + std(1)%np(1)%x = 0.3d0 + std(1)%np(2)%x = 0.3555d0 + std(1)%np(3)%x = 0.26782d0 + std(1)%np(4)%x = 0d0 + std(1)%np(5)%x = 1.555d0 + std(1)%np(6)%x = 7.3d0 + std(1)%np(7)%x = 7.8d0 + std(1)%np(8)%x = 6.3d0 + std(1)%np(9)%x = 5.5d0 + !do i = 1, 2 + ! write(*, "('std(1)%np(',i1,')%x = ',1e22.14)") i, std(1)%np(i)%x + !end do + !do i = 1, 2 + ! write(*, "('std(1)%np(1:',i1,') = ',9e22.14)") i, std(1)%np(1:i)%x + !end do + a = std(1)%np(1:2)%x + b = [std(1)%np(1)%x, std(1)%np(2)%x] + !print *,a + !print *,b + if (allocated (std(1)%np)) deallocate (std(1)%np) + if (associated (std)) deallocate (std) + if (norm2(a - b) .gt. 1d-3) stop 1 + end subroutine + + subroutine comment_4 + integer, parameter :: length = 2 + real(8), dimension(length) :: a, b + integer :: i + + type point +real(8) :: x + end type point + + type points +type(point), dimension(:), pointer :: np=>null() + end type points + + type stored +integer :: l +type(points), pointer :: nfpoint=>null() + end type stored + + type(stored), dimension(:), pointer :: std=>null() + + + allocate(std(1)) + allocate(std(1)%nfpoint) + allocate(std(1)%nfpoint%np(length)) + std(1)%nfpoint%np(1)%x = 0.3d0 + std(1)%nfpoint%np(2)%x = 0.3555d0 + + !do i = 1, length + ! write(*, "('std(1)%nfpoint%np(',i1,')%x = ',1e22.14)") i, std(1)%nfpoint%np(i)%x + !end do + !do i = 1, length + ! write(*, "('std(1)%nfpoint%np(1:',i1,')%x = ',2e22.14)") i, std(1)%nfpoint%np(1:i)%x + !end do + a = std(1)%nfpoint%np(1:2)%x + b = [std(1)%nfpoint%np(1)%x, std(1)%nfpoint%np(2)%x] + if (associated (std(1)%nfpoint%np)) deallocate (std(1)%nfpoint%np) + if (associated (std(1)%nfpoint)) deallocate (std(1)%nfpoint) + if (associated (std)) deallocate (std) + if (norm2(a - b) .gt. 1d-3) stop 2 + end subroutine + end program test
Re: [range-ops] patch 05/04: bonus round!
On 7/3/19 7:12 PM, Jeff Law wrote: On 7/1/19 4:24 AM, Aldy Hernandez wrote: This is completely unrelated to range-ops itself, but may yield better results in value_range intersections. It's just something I found while working on VRP, and have been dragging around on our branch. If we know the intersection of two ranges is the empty set, there is no need to conservatively add anything to the result. Tested on x86-64 Linux with --enable-languages=all. Aldy range-ops-intersect-undefined.patch commit 4f9aa7bd1066267eee92f622ff29d78534158e20 Author: Aldy Hernandez Date: Fri Jun 28 11:34:19 2019 +0200 Do not try to further refine a VR_UNDEFINED result when intersecting value_ranges. diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 01fb97cedb2..b0d78ee6871 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,9 @@ +2019-07-01 Aldy Hernandez + + * tree-vrp.c (intersect_ranges): If we know the intersection is + empty, there is no need to conservatively add anything else to + the set. Do we have a test where this improves the code or at least the computed ranges? I have long since lost the testcase, but a little hacking around to dump all the cases where we are trying to conservatively refine a VR_UNDEFINED, yields tons of hits. See attached hack. By far, the most common is the intersection of ~[0,0] and [0,0], which yields VR_UNDEFINED. We then conservatively drop to VR1, which is [0,0], basically pessimizing the result. Other examples include: unsigned char [38, +INF], unsigned char [22, 22] unsigned char [4, 4], unsigned char [1, 1] unsigned char [4, 4], unsigned char [2, 2] unsigned int [0, 64], unsigned int [128, 128] unsigned int [0, 6], unsigned int [4294967275, 4294967275] unsigned int ~[35, 35], unsigned int [35, 35] unsigned int [46, 52], unsigned int [25, 25] etc Within 7 minutes of building a compiler (up to stage2), this incorrect refinement of VR_UNDEFINED has been triggered 29000 times. If we know it's VR_UNDEFINED (the empty set), I think we should leave it empty :). Aldy commit f9f6ae7188d0cf45d817f46e46b34c639d29993f Author: Aldy Hernandez Date: Thu Jul 4 11:21:28 2019 +0200 WIP: UNDEFINED refine diff --git a/gcc/tree-vrp.c b/gcc/tree-vrp.c index 5caea6d3593..5390a916d2f 100644 --- a/gcc/tree-vrp.c +++ b/gcc/tree-vrp.c @@ -6150,6 +6150,9 @@ intersect_ranges (enum value_range_kind *vr0type, enum value_range_kind vr1type, tree vr1min, tree vr1max) { + value_range_base t0 (*vr0type, *vr0min, *vr0max); + value_range_base t1 (vr1type, vr1min, vr1max); + bool mineq = vrp_operand_equal_p (*vr0min, vr1min); bool maxeq = vrp_operand_equal_p (*vr0max, vr1max); @@ -6445,10 +6448,12 @@ intersect_ranges (enum value_range_kind *vr0type, gcc_unreachable (); } +#if 0 /* If we know the intersection is empty, there's no need to conservatively add anything else to the set. */ if (*vr0type == VR_UNDEFINED) return; +#endif /* As a fallback simply use { *VRTYPE, *VR0MIN, *VR0MAX } as result for the intersection. That's always a conservative @@ -6458,6 +6463,16 @@ intersect_ranges (enum value_range_kind *vr0type, && is_gimple_min_invariant (vr1min) && vrp_operand_equal_p (vr1min, vr1max)) { + if (*vr0type == VR_UNDEFINED) + { + FILE *out = fopen("/tmp/asdf.aldy", "a"); + fprintf(out, "intersect_ranges about to refine VR_UNDEFINED\n"); + t0.dump(out); + fprintf(out, ","); + t1.dump(out); + fprintf(out, "\n"); + fclose(out); + } *vr0type = vr1type; *vr0min = vr1min; *vr0max = vr1max;
[committed] OpenMP scan for combined for simd
Hi! The following patch handles the last yet unsupported scan case, composite #pragma omp {,parallel }for simd ... reduction(inscan, ...) ... where we want to both parallelize and vectorize; in the first worksharing loop use normal scan support we have for #pragma omp simd ... reduction(inscan, ...) ... and just store those into the per-thread array, then do the normal worksharing loop up/down-sweep and then in the second worksharing loop use a simple #pragma omp simd that just combines the two partial sums and does the user's scan phase. Bootstrapped/regtested on x86_64-linux and i686-linux, committed to trunk. 2019-07-06 Jakub Jelinek * omp-low.c (struct omp_context): Add for_simd_scan_phase member. (maybe_lookup_ctx): Add forward declaration. (omp_find_scan): Likewise. Walk into body of simd if composited with worksharing loop. (scan_omp_simd_scan): New function. (scan_omp_1_stmt): Call it. (lower_rec_simd_input_clauses): Don't create rvar nor rvar2 if ctx->for_simd_scan_phase. (lower_rec_input_clauses): Do much less work for inscan reductions in ctx->for_simd_scan_phase is_simd regions. (lower_omp_scan): Set is_simd also on simd constructs composited with worksharing loop, unless ctx->for_simd_scan_phase. Never emit a sorry message. Don't change GIMPLE_OMP_SCAN stmts into nops and emit their body after in simd constructs composited with worksharing loop. (lower_omp_for_scan): Handle worksharing loop composited with simd. * c-c++-common/gomp/scan-4.c: Don't expect sorry message. * testsuite/libgomp.c/scan-11.c: New test. * testsuite/libgomp.c/scan-12.c: New test. * testsuite/libgomp.c/scan-13.c: New test. * testsuite/libgomp.c/scan-14.c: New test. * testsuite/libgomp.c/scan-15.c: New test. * testsuite/libgomp.c/scan-16.c: New test. * testsuite/libgomp.c/scan-17.c: New test. * testsuite/libgomp.c/scan-18.c: New test. * testsuite/libgomp.c++/scan-9.C: New test. * testsuite/libgomp.c++/scan-10.C: New test. * testsuite/libgomp.c++/scan-11.C: New test. * testsuite/libgomp.c++/scan-12.C: New test. * testsuite/libgomp.c++/scan-13.C: New test. * testsuite/libgomp.c++/scan-14.C: New test. * testsuite/libgomp.c++/scan-15.C: New test. * testsuite/libgomp.c++/scan-16.C: New test. --- gcc/omp-low.c.jj2019-07-05 14:35:03.601143688 +0200 +++ gcc/omp-low.c 2019-07-05 21:23:45.813525150 +0200 @@ -147,6 +147,9 @@ struct omp_context /* True if there is nested scan context with exclusive clause. */ bool scan_exclusive; + + /* True in the second simd loop of for simd with inscan reductions. */ + bool for_simd_scan_phase; }; static splay_tree all_contexts; @@ -2421,6 +2424,85 @@ scan_omp_simd (gimple_stmt_iterator *gsi scan_omp_for (stmt, outer_ctx)->simt_stmt = new_stmt; } +static tree omp_find_scan (gimple_stmt_iterator *, bool *, + struct walk_stmt_info *); +static omp_context *maybe_lookup_ctx (gimple *); + +/* Duplicate #pragma omp simd, one for the scan input phase loop and one + for scan phase loop. */ + +static void +scan_omp_simd_scan (gimple_stmt_iterator *gsi, gomp_for *stmt, + omp_context *outer_ctx) +{ + /* The only change between inclusive and exclusive scan will be + within the first simd loop, so just use inclusive in the + worksharing loop. */ + outer_ctx->scan_inclusive = true; + tree c = build_omp_clause (UNKNOWN_LOCATION, OMP_CLAUSE_INCLUSIVE); + OMP_CLAUSE_DECL (c) = integer_zero_node; + + gomp_scan *input_stmt = gimple_build_omp_scan (NULL, NULL_TREE); + gomp_scan *scan_stmt = gimple_build_omp_scan (NULL, c); + gsi_replace (gsi, input_stmt, false); + gimple_seq input_body = NULL; + gimple_seq_add_stmt (_body, stmt); + gsi_insert_after (gsi, scan_stmt, GSI_NEW_STMT); + + gimple_stmt_iterator input1_gsi = gsi_none (); + struct walk_stmt_info wi; + memset (, 0, sizeof (wi)); + wi.val_only = true; + wi.info = (void *) _gsi; + walk_gimple_seq_mod (gimple_omp_body_ptr (stmt), omp_find_scan, NULL, ); + gcc_assert (!gsi_end_p (input1_gsi)); + + gimple *input_stmt1 = gsi_stmt (input1_gsi); + gsi_next (_gsi); + gimple *scan_stmt1 = gsi_stmt (input1_gsi); + gcc_assert (scan_stmt1 && gimple_code (scan_stmt1) == GIMPLE_OMP_SCAN); + c = gimple_omp_scan_clauses (as_a (scan_stmt1)); + if (c && OMP_CLAUSE_CODE (c) == OMP_CLAUSE_EXCLUSIVE) +std::swap (input_stmt1, scan_stmt1); + + gimple_seq input_body1 = gimple_omp_body (input_stmt1); + gimple_omp_set_body (input_stmt1, NULL); + + gimple_seq scan_body = copy_gimple_seq_and_replace_locals (stmt); + gomp_for *new_stmt = as_a (scan_body); + + gimple_omp_set_body (input_stmt1, input_body1); + gimple_omp_set_body (scan_stmt1, NULL); + + gimple_stmt_iterator input2_gsi = gsi_none
[committed] Fix vect-simd-14.c for arm (PR tree-optimization/91096)
Hi! Apparently on arm we force -ffast-math in gcc.dg/vect/ but that changes behavior, as we don't want to insert with -ffinite-math-only infinities when the user didn't use them, for exclusive scan we get -__FLT_MAX__ in the first element rather than -inf. Fixed thusly, bootstrapped/regtested on x86_64-linux and i686-linux, committed to trunk. I wonder whether we for -ffast-math shouldn't fold say MAX_EXPR (-__FLT_MAX__, x) into x etc. (in particular if we don't HONOR_INFINITIES as well as don't care about NaNs). 2019-07-06 Jakub Jelinek PR tree-optimization/91096 * gcc.dg/vect/vect-simd-10.c (FLT_MIN_VALUE): Define. (bar, main): Use it instead of -__builtin_inff (). * gcc.dg/vect/vect-simd-14.c (FLT_MIN_VALUE): Define. (bar, main): Use it instead of -__builtin_inff (). --- gcc/testsuite/gcc.dg/vect/vect-simd-10.c.jj 2019-06-19 11:58:53.164238382 +0200 +++ gcc/testsuite/gcc.dg/vect/vect-simd-10.c2019-07-05 17:30:29.617292809 +0200 @@ -7,6 +7,12 @@ #include "tree-vect.h" #endif +#ifdef __FAST_MATH__ +#define FLT_MIN_VALUE (-__FLT_MAX__) +#else +#define FLT_MIN_VALUE (-__builtin_inff ()) +#endif + float r = 1.0f, a[1024], b[1024]; __attribute__((noipa)) void @@ -24,7 +30,7 @@ foo (float *a, float *b) __attribute__((noipa)) float bar (void) { - float s = -__builtin_inff (); + float s = FLT_MIN_VALUE; #pragma omp simd reduction (inscan, max:s) for (int i = 0; i < 1024; i++) { @@ -84,7 +90,7 @@ main () } if (bar () != 592.0f) abort (); - s = -__builtin_inff (); + s = FLT_MIN_VALUE; for (int i = 0; i < 1024; ++i) { if (s < a[i]) --- gcc/testsuite/gcc.dg/vect/vect-simd-14.c.jj 2019-06-21 08:47:04.175673252 +0200 +++ gcc/testsuite/gcc.dg/vect/vect-simd-14.c2019-07-05 17:30:47.455011270 +0200 @@ -7,6 +7,12 @@ #include "tree-vect.h" #endif +#ifdef __FAST_MATH__ +#define FLT_MIN_VALUE (-__FLT_MAX__) +#else +#define FLT_MIN_VALUE (-__builtin_inff ()) +#endif + float r = 1.0f, a[1024], b[1024]; __attribute__((noipa)) void @@ -24,7 +30,7 @@ foo (float *a, float *b) __attribute__((noipa)) float bar (void) { - float s = -__builtin_inff (); + float s = FLT_MIN_VALUE; #pragma omp simd reduction (inscan, max:s) for (int i = 0; i < 1024; i++) { @@ -82,7 +88,7 @@ main () } if (bar () != 592.0f) abort (); - s = -__builtin_inff (); + s = FLT_MIN_VALUE; for (int i = 0; i < 1024; ++i) { if (b[i] != s) Jakub
[committed] Fix OpenMP final scan merging with hypothetical non-commutative combiner
Hi! I've noticed I got the order of arguments in the last UDR combine operation of worksharing scan wrong, var2 contains either the neutral element (first thread) or the prefix sum from the original value up to the last iteration before what the current thread handles and rprivb array contains the partial prefix sum just for the iterations the current thread handled, so if hypothetically user uses some non-commutative UDR which can still be used for prefix sum and get some reasonable result, we get this combination wrong. Fixed thusly, bootstrapped/regtested on x86_64-linux and i686-linux, committed to trunk. 2019-07-06 Jakub Jelinek * omp-low.c (omp_find_scan): Make static. (lower_omp_for_scan): Fix order of merge arguments in input phase of the second loop, var2 represents the first partial sum and so needs to go before rprivb[ivar]. --- gcc/omp-low.c.jj2019-07-04 23:38:56.100210759 +0200 +++ gcc/omp-low.c 2019-07-05 13:33:13.795296606 +0200 @@ -9104,7 +9104,7 @@ lower_omp_for_lastprivate (struct omp_fo /* Callback for walk_gimple_seq. Find #pragma omp scan statement. */ -tree +static tree omp_find_scan (gimple_stmt_iterator *gsi_p, bool *handled_ops_p, struct walk_stmt_info *wi) { @@ -9240,8 +9240,8 @@ omp_find_scan (gimple_stmt_iterator *gsi for (i = 0; i < n; i = i + 1) { { -// For UDRs, this is UDR merge (rprivb[ivar], var2); r = rprivb[ivar]; -r = rprivb[ivar] + var2; +// For UDRs, this is r = var2; UDR merge (r, rprivb[ivar]); +r = var2 + rprivb[ivar]; } { // This is the scan phase from user code. @@ -9394,8 +9394,6 @@ lower_omp_for_scan (gimple_seq *body_p, { tree placeholder = OMP_CLAUSE_REDUCTION_PLACEHOLDER (c); tree val = var2; - if (new_vard != new_var) - val = build_fold_addr_expr_loc (clause_loc, val); x = lang_hooks.decls.omp_clause_default_ctor (c, var2, build_outer_var_ref (var, ctx)); @@ -9420,6 +9418,9 @@ lower_omp_for_scan (gimple_seq *body_p, /* Otherwise, assign to it the identity element. */ gimple_seq tseq = OMP_CLAUSE_REDUCTION_GIMPLE_INIT (c); tseq = copy_gimple_seq_and_replace_locals (tseq); + + if (new_vard != new_var) + val = build_fold_addr_expr_loc (clause_loc, val); SET_DECL_VALUE_EXPR (new_vard, val); DECL_HAS_VALUE_EXPR_P (new_vard) = 1; SET_DECL_VALUE_EXPR (placeholder, error_mark_node); @@ -9469,11 +9470,19 @@ lower_omp_for_scan (gimple_seq *body_p, x = lang_hooks.decls.omp_clause_assign_op (c, x, var2); gimplify_and_add (x, ); + x = unshare_expr (new_var); + x = lang_hooks.decls.omp_clause_assign_op (c, x, var2); + gimplify_and_add (x, _list); + + val = rprivb_ref; + if (new_vard != new_var) + val = build_fold_addr_expr_loc (clause_loc, val); + tseq = OMP_CLAUSE_REDUCTION_GIMPLE_MERGE (c); tseq = copy_gimple_seq_and_replace_locals (tseq); SET_DECL_VALUE_EXPR (new_vard, val); DECL_HAS_VALUE_EXPR_P (new_vard) = 1; - SET_DECL_VALUE_EXPR (placeholder, rprivb_ref); + DECL_HAS_VALUE_EXPR_P (placeholder) = 0; lower_omp (, ctx); if (y) SET_DECL_VALUE_EXPR (new_vard, y); @@ -9482,12 +9491,11 @@ lower_omp_for_scan (gimple_seq *body_p, DECL_HAS_VALUE_EXPR_P (new_vard) = 0; SET_DECL_VALUE_EXPR (new_vard, NULL_TREE); } + SET_DECL_VALUE_EXPR (placeholder, new_var); + DECL_HAS_VALUE_EXPR_P (placeholder) = 1; + lower_omp (, ctx); gimple_seq_add_seq (_list, tseq); - x = unshare_expr (new_var); - x = lang_hooks.decls.omp_clause_assign_op (c, x, rprivb_ref); - gimplify_and_add (x, _list); - x = build_outer_var_ref (var, ctx); x = lang_hooks.decls.omp_clause_assign_op (c, x, rpriva_ref); gimplify_and_add (x, _list); @@ -9545,7 +9553,7 @@ lower_omp_for_scan (gimple_seq *body_p, gimplify_assign (unshare_expr (rpriva_ref), var2, ); - x = build2 (code, TREE_TYPE (new_var), rprivb_ref, var2); + x = build2 (code, TREE_TYPE (new_var), var2, rprivb_ref); gimplify_assign (new_var, x, _list); gimplify_assign (build_outer_var_ref (var, ctx), rpriva_ref, Jakub
Re: [PATCH 1/3] C++20 constexpr lib part 1/3
On Sat, 6 Jul 2019 at 06:12, Ed Smith-Rowland via libstdc++ wrote: > By my reckoning, you have a constexpr source array, an output array that > is initialized as it must be for constexpr.?? You have to have a > deterministic result after the copy.?? In the local array version the > actual iterator is not leaking out - just the results of a calculation > that must return one result. > > The only thing that 'helps' is removing the constexpr from the iterator > return and of course that's the whole point of the thing. > > Blargh! But that's fine; the result of copy is not stored in a constexpr variable, but the function return is static_asserted so we have sufficiently tested that std::copy is indeed constexpr-compatible since it appears in a function that is evaluated at compile-time.