Arrays in fortran have a couple of internal variables associated with them, e.g. stride, lbound, ubound, size, etc. Depending on how and where the array was declared, these internal variables may be packed inside an array descriptor represented by a struct or defined individually. The major problem with this is that kernels and parallel regions with default(none) will generate errors if those internal variables are defined individually since the user has no way to add clauses to them. I suspect this is also true for arrays inside omp target regions.
My fix for this involves two parts. First, I reinitialize those private array variables which aren't associated with array descriptors at the beginning of the parallel/kernels region they are used in. Second, I added OMP_CLAUSE_PRIVATE for those internal variables. I'll apply this patch to gomp-4_0-branch shortly. Is there any reason why only certain arrays have array descriptors? The arrays with descriptors don't have this problem. It's only the ones without descriptors that leak new internal variables that cause errors with default(none). Cesar
2015-10-13 Cesar Philippidis <ce...@codesourcery.com> gcc/fortran/ * trans-array.c (gfc_trans_array_bounds): Add an INIT_VLA argument to control whether VLAs should be initialized. Don't mark this function as static. (gfc_trans_auto_array_allocation): Update call to gfc_trans_array_bounds. (gfc_trans_g77_array): Likewise. * trans-array.h: Declare gfc_trans_array_bounds. * trans-openmp.c (gfc_scan_nodesc_arrays): New function. (gfc_privatize_nodesc_arrays_1): New function. (gfc_privatize_nodesc_arrays): New function. (gfc_init_nodesc_arrays): New function. (gfc_trans_oacc_construct): Initialize any internal variables for arrays without array descriptors inside the offloaded parallel and kernels region. (gfc_trans_oacc_combined_directive): Likewise. gcc/testsuite/ * gfortran.dg/goacc/default_none.f95: New test. diff --git a/gcc/fortran/trans-array.c b/gcc/fortran/trans-array.c index a6b761b..86f983a 100644 --- a/gcc/fortran/trans-array.c +++ b/gcc/fortran/trans-array.c @@ -5709,9 +5709,9 @@ gfc_trans_array_cobounds (tree type, stmtblock_t * pblock, /* Generate code to evaluate non-constant array bounds. Sets *poffset and returns the size (in elements) of the array. */ -static tree +tree gfc_trans_array_bounds (tree type, gfc_symbol * sym, tree * poffset, - stmtblock_t * pblock) + stmtblock_t * pblock, bool init_vla) { gfc_array_spec *as; tree size; @@ -5788,7 +5788,9 @@ gfc_trans_array_bounds (tree type, gfc_symbol * sym, tree * poffset, } gfc_trans_array_cobounds (type, pblock, sym); - gfc_trans_vla_type_sizes (sym, pblock); + + if (init_vla) + gfc_trans_vla_type_sizes (sym, pblock); *poffset = offset; return size; @@ -5852,7 +5854,7 @@ gfc_trans_auto_array_allocation (tree decl, gfc_symbol * sym, && !INTEGER_CST_P (sym->ts.u.cl->backend_decl)) gfc_conv_string_length (sym->ts.u.cl, NULL, &init); - size = gfc_trans_array_bounds (type, sym, &offset, &init); + size = gfc_trans_array_bounds (type, sym, &offset, &init, true); /* Don't actually allocate space for Cray Pointees. */ if (sym->attr.cray_pointee) @@ -5947,7 +5949,7 @@ gfc_trans_g77_array (gfc_symbol * sym, gfc_wrapped_block * block) gfc_conv_string_length (sym->ts.u.cl, NULL, &init); /* Evaluate the bounds of the array. */ - gfc_trans_array_bounds (type, sym, &offset, &init); + gfc_trans_array_bounds (type, sym, &offset, &init, true); /* Set the offset. */ if (TREE_CODE (GFC_TYPE_ARRAY_OFFSET (type)) == VAR_DECL) diff --git a/gcc/fortran/trans-array.h b/gcc/fortran/trans-array.h index 52f1c9a..8dbafb9 100644 --- a/gcc/fortran/trans-array.h +++ b/gcc/fortran/trans-array.h @@ -44,6 +44,8 @@ void gfc_trans_g77_array (gfc_symbol *, gfc_wrapped_block *); /* Generate code to deallocate an array, if it is allocated. */ tree gfc_trans_dealloc_allocated (tree, bool, gfc_expr *); +tree gfc_trans_array_bounds (tree, gfc_symbol *, tree *, stmtblock_t *, bool); + tree gfc_full_array_size (stmtblock_t *, tree, int); tree gfc_duplicate_allocatable (tree, tree, tree, int, tree); diff --git a/gcc/fortran/trans-openmp.c b/gcc/fortran/trans-openmp.c index 8c1e897..f2e9803 100644 --- a/gcc/fortran/trans-openmp.c +++ b/gcc/fortran/trans-openmp.c @@ -39,6 +39,8 @@ along with GCC; see the file COPYING3. If not see #include "arith.h" #include "omp-low.h" #include "gomp-constants.h" +#include "hash-set.h" +#include "tree-iterator.h" int ompws_flags; @@ -2716,22 +2718,157 @@ gfc_trans_omp_code (gfc_code *code, bool force_empty) return stmt; } +void gfc_debug_expr (gfc_expr *); + +/* Add any array that does not have an array descriptor to the hash_set + pointed to by DATA. */ + +static int +gfc_scan_nodesc_arrays (gfc_expr **e, int *walk_subtrees ATTRIBUTE_UNUSED, + void *data) +{ + hash_set<gfc_symbol *> *arrays = (hash_set<gfc_symbol *> *)data; + + if ((*e)->expr_type == EXPR_VARIABLE) + { + gfc_symbol *sym = (*e)->symtree->n.sym; + + if (sym->attr.dimension && gfc_is_nodesc_array (sym)) + arrays->add (sym); + } + + return 0; +} + +/* Build a set of internal array variables (lbound, ubound, stride, etc.) + that need privatization. */ + +static tree +gfc_privatize_nodesc_arrays_1 (tree *tp, int *walk_subtrees, void *data) +{ + hash_set<tree> *decls = (hash_set<tree> *)data; + + if (TREE_CODE (*tp) == MODIFY_EXPR) + { + tree lhs = TREE_OPERAND (*tp, 0); + if (DECL_P (lhs)) + decls->add (lhs); + } + + if (IS_TYPE_OR_DECL_P (*tp)) + *walk_subtrees = false; + + return NULL; +} + +/* Reinitialize all of the arrays inside ARRAY_SET in BLOCK. Append private + clauses for those arrays in CLAUSES. */ + +static tree +gfc_privatize_nodesc_arrays (hash_set<gfc_symbol *> *array_set, + stmtblock_t *block, tree clauses) +{ + hash_set<gfc_symbol *>::iterator its = array_set->begin (); + hash_set<tree> *private_decls = new hash_set<tree>; + + for (; its != array_set->end (); ++its) + { + gfc_symbol *sym = *its; + tree parm = sym->backend_decl; + tree type = TREE_TYPE (parm); + tree offset, tmp; + + /* Evaluate the bounds of the array. */ + gfc_trans_array_bounds (type, sym, &offset, block, false); + + /* Set the offset. */ + if (TREE_CODE (GFC_TYPE_ARRAY_OFFSET (type)) == VAR_DECL) + gfc_add_modify (block, GFC_TYPE_ARRAY_OFFSET (type), offset); + + /* Set the pointer itself if we aren't using the parameter + directly. */ + if (TREE_CODE (parm) != PARM_DECL && DECL_LANG_SPECIFIC (parm) + && GFC_DECL_SAVED_DESCRIPTOR (parm)) + { + tmp = convert (TREE_TYPE (parm), + GFC_DECL_SAVED_DESCRIPTOR (parm)); + gfc_add_modify (block, parm, tmp); + } + } + + /* Add private clauses for any variables that are used by + gfc_trans_array_bounds. */ + walk_tree_without_duplicates (&block->head, gfc_privatize_nodesc_arrays_1, + private_decls); + + hash_set<tree>::iterator itt = private_decls->begin (); + + for (; itt != private_decls->end (); ++itt) + { + tree nc = build_omp_clause (input_location, OMP_CLAUSE_PRIVATE); + OMP_CLAUSE_DECL (nc) = *itt; + OMP_CLAUSE_CHAIN (nc) = clauses; + clauses = nc; + } + + delete private_decls; + + return clauses; +} + +/* Reinitialize any arrays in CLAUSES used inside CODE which do not contain + array descriptors if SCAN_NODESC_ARRAYS is TRUE. Place the initialization + sequences in CODE. Update CLAUSES to contain OMP_CLAUSE_PRIVATE for any + arrays which were initialized. */ + +static hash_set<gfc_symbol *> * +gfc_init_nodesc_arrays (stmtblock_t *inner, tree *clauses, gfc_code *code, + bool scan_nodesc_arrays) +{ + hash_set<gfc_symbol *> *array_set = NULL; + + if (!scan_nodesc_arrays) + return NULL; + + array_set = new hash_set<gfc_symbol *>; + gfc_code_walker (&code, gfc_dummy_code_callback, gfc_scan_nodesc_arrays, + array_set); + + if (array_set->elements ()) + { + gfc_start_block (inner); + pushlevel (); + *clauses = gfc_privatize_nodesc_arrays (array_set, inner, *clauses); + } + else + { + delete array_set; + array_set = NULL; + } + + return array_set; +} + /* Trans OpenACC directives. */ /* parallel, kernels, data and host_data. */ static tree gfc_trans_oacc_construct (gfc_code *code) { - stmtblock_t block; + stmtblock_t block, inner; tree stmt, oacc_clauses; enum tree_code construct_code; + bool scan_nodesc_arrays = false; + hash_set<gfc_symbol *> *array_set = NULL; switch (code->op) { case EXEC_OACC_PARALLEL: construct_code = OACC_PARALLEL; + scan_nodesc_arrays = true; break; case EXEC_OACC_KERNELS: construct_code = OACC_KERNELS; + scan_nodesc_arrays = true; break; case EXEC_OACC_DATA: construct_code = OACC_DATA; @@ -2746,10 +2883,25 @@ gfc_trans_oacc_construct (gfc_code *code) gfc_start_block (&block); oacc_clauses = gfc_trans_omp_clauses (&block, code->ext.omp_clauses, code->loc); + + array_set = gfc_init_nodesc_arrays (&inner, &oacc_clauses, code, + scan_nodesc_arrays); + stmt = gfc_trans_omp_code (code->block->next, true); + + if (array_set && array_set->elements ()) + { + gfc_add_expr_to_block (&inner, stmt); + stmt = gfc_finish_block (&inner); + stmt = build3_v (BIND_EXPR, NULL, stmt, poplevel (1, 0)); + delete array_set; + } + stmt = build2_loc (input_location, construct_code, void_type_node, stmt, oacc_clauses); + gfc_add_expr_to_block (&block, stmt); + return gfc_finish_block (&block); } @@ -3483,18 +3635,22 @@ gfc_trans_omp_do (gfc_code *code, gfc_exec_op op, stmtblock_t *pblock, static tree gfc_trans_oacc_combined_directive (gfc_code *code) { - stmtblock_t block, *pblock = NULL; + stmtblock_t block, inner, *pblock = NULL; gfc_omp_clauses construct_clauses, loop_clauses; tree stmt, oacc_clauses = NULL_TREE; enum tree_code construct_code; + bool scan_nodesc_arrays = false; + hash_set<gfc_symbol *> *array_set = NULL; switch (code->op) { case EXEC_OACC_PARALLEL_LOOP: construct_code = OACC_PARALLEL; + scan_nodesc_arrays = true; break; case EXEC_OACC_KERNELS_LOOP: construct_code = OACC_KERNELS; + scan_nodesc_arrays = true; break; default: gcc_unreachable (); @@ -3526,18 +3682,35 @@ gfc_trans_oacc_combined_directive (gfc_code *code) oacc_clauses = gfc_trans_omp_clauses (&block, &construct_clauses, code->loc); } + + array_set = gfc_init_nodesc_arrays (&inner, &oacc_clauses, code, + scan_nodesc_arrays); + if (!loop_clauses.seq) - pblock = █ + pblock = (array_set && array_set->elements ()) ? &inner : █ else pushlevel (); stmt = gfc_trans_omp_do (code, EXEC_OACC_LOOP, pblock, &loop_clauses, NULL); + + if (array_set && array_set->elements ()) + gfc_add_expr_to_block (&inner, stmt); + if (TREE_CODE (stmt) != BIND_EXPR) stmt = build3_v (BIND_EXPR, NULL, stmt, poplevel (1, 0)); else poplevel (0, 0); + + if (array_set && array_set->elements ()) + { + stmt = gfc_finish_block (&inner); + stmt = build3_v (BIND_EXPR, NULL, stmt, poplevel (1, 0)); + delete array_set; + } + stmt = build2_loc (input_location, construct_code, void_type_node, stmt, oacc_clauses); gfc_add_expr_to_block (&block, stmt); + return gfc_finish_block (&block); } diff --git a/gcc/testsuite/gfortran.dg/goacc/default_none.f95 b/gcc/testsuite/gfortran.dg/goacc/default_none.f95 new file mode 100644 index 0000000..5ce66ae --- /dev/null +++ b/gcc/testsuite/gfortran.dg/goacc/default_none.f95 @@ -0,0 +1,59 @@ +! Ensure that the internal array variables, offset, lbound, etc., don't +! trigger errors with default(none). + +! { dg-do compile } + +program main + implicit none + integer i + integer,parameter :: n = 100 + integer,allocatable :: a1(:), a2(:,:) + + allocate (a1 (n)) + allocate (a2 (-n:n,-n:n)) + a1 (:) = -1 + + !$acc parallel loop default(none) copy (a1(1:n)) + do i = 1,n + a1(i) = i + end do + !$acc end parallel loop + + call foo (a1) + call bar (a1, n) + call foobar (a2,n) + +contains + + subroutine foo (da1) + integer :: da1(n) + + !$acc parallel loop default(none) copy (da1(1:n)) + do i = 1,n + da1(i) = i*2 + end do + !$acc end parallel loop + end subroutine foo +end program main + +subroutine bar (da2,n) + integer :: n, da2(n) + integer i + + !$acc parallel loop default(none) copy (da2(1:n)) firstprivate(n) + do i = 1,n + da2(i) = i*3 + end do + !$acc end parallel loop +end subroutine bar + +subroutine foobar (da3,n) + integer :: n, da3(-n:n,-n:n) + integer i + + !$acc parallel loop default(none) copy (da3(-n:n,-n:n)) firstprivate(n) + do i = 1,n + da3(i,0) = i*3 + end do + !$acc end parallel loop +end subroutine foobar