Well, here's a variation which actually passes regression-test.
Seems I implicitly believed that the implicit save on main program variables actually works... well, it turns out that it doesn't, which is now PR85364. OK for trunk? Thomas 2018-04-12 Thomas Koenig <tkoe...@gcc.gnu.org> PR fortran/83064 PR testsuite/85346 * trans-stmt.c (gfc_trans_forall_loop): Use annot_expr_ivdep_kind for annotation and remove dependence on -ftree-parallelize-loops. 2018-04-12 Thomas Koenig <tkoe...@gcc.gnu.org> PR fortran/83064 PR testsuite/85346 * gfortran.dg/do_concurrent_5.f90: Dynamically allocate main work array and move test to libgomp/testsuite/libgomp.fortran. * gfortran.dg/do_concurrent_6.f90: New test. 2018-04-12 Thomas Koenig <tkoe...@gcc.gnu.org> PR fortran/83064 PR testsuite/85346 * testsuite/libgomp.fortran: Move modified test from gfortran.dg to here.
Index: trans-stmt.c =================================================================== --- trans-stmt.c (Revision 259326) +++ trans-stmt.c (Arbeitskopie) @@ -3643,12 +3643,12 @@ gfc_trans_forall_loop (forall_info *forall_tmp, tr cond = fold_build2_loc (input_location, LE_EXPR, logical_type_node, count, build_int_cst (TREE_TYPE (count), 0)); - /* PR 83064 means that we cannot use the annotation if the - autoparallelizer is active. */ - if (forall_tmp->do_concurrent && ! flag_tree_parallelize_loops) + /* PR 83064 means that we cannot use annot_expr_parallel_kind until + the autoparallelizer can hande this. */ + if (forall_tmp->do_concurrent) cond = build3 (ANNOTATE_EXPR, TREE_TYPE (cond), cond, build_int_cst (integer_type_node, - annot_expr_parallel_kind), + annot_expr_ivdep_kind), integer_zero_node); tmp = build1_v (GOTO_EXPR, exit_label);
! { dg-do run } ! PR 83064 - this used to give wrong results. ! { dg-additional-options "-O1 -ftree-parallelize-loops=2" } ! Original test case by Christian Felter program main use, intrinsic :: iso_fortran_env implicit none integer, parameter :: nsplit = 4 integer(int64), parameter :: ne = 2**20 integer(int64) :: stride, low(nsplit), high(nsplit), i integer(int64), dimension(:), allocatable :: edof real(real64), dimension(nsplit) :: pi allocate (edof(ne)) edof(1::4) = 1 edof(2::4) = 2 edof(3::4) = 3 edof(4::4) = 4 stride = ceiling(real(ne)/nsplit) do i = 1, nsplit high(i) = stride*i end do do i = 2, nsplit low(i) = high(i-1) + 1 end do low(1) = 1 high(nsplit) = ne pi = 0 do concurrent (i = 1:nsplit) pi(i) = sum(compute( low(i), high(i) )) end do if (abs (sum(pi) - atan(1.0d0)) > 1e-5) STOP 1 contains pure function compute( low, high ) result( ttt ) integer(int64), intent(in) :: low, high real(real64), dimension(nsplit) :: ttt integer(int64) :: j, k ttt = 0 ! Unrolled loop ! do j = low, high, 4 ! k = 1 ! ttt(k) = ttt(k) + (-1)**(j+1) / real( 2*j-1 ) ! k = 2 ! ttt(k) = ttt(k) + (-1)**(j+2) / real( 2*j+1 ) ! k = 3 ! ttt(k) = ttt(k) + (-1)**(j+3) / real( 2*j+3 ) ! k = 4 ! ttt(k) = ttt(k) + (-1)**(j+4) / real( 2*j+5 ) ! end do ! Loop with modulo operation ! do j = low, high ! k = mod( j, nsplit ) + 1 ! ttt(k) = ttt(k) + (-1)**(j+1) / real( 2*j-1 ) ! end do ! Loop with subscripting via host association do j = low, high k = edof(j) ttt(k) = ttt(k) + (-1.0_real64)**(j+1) / real( 2*j-1 ) end do end function end program main
! { dg-do compile } ! { dg-additional-options "-fdump-tree-original" } program main real, dimension(100) :: a,b call random_number(a) do concurrent (i=1:100) b(i) = a(i)*a(i) end do print *,sum(a) end program main ! { dg-final { scan-tree-dump-times "ivdep" 1 "original" } }