I've applied this patch to gomp-4_0-branch which fixes an ICE when an array declared inside a module is used inside an offloaded acc region. Bad things happen when you try to use sym->backend_decl when it wasn't defined.
This patch was part of an optimization that I implemented in gomp4 in an attempt to move all of the non-array descriptor array variables into the offloaded region. Applications, such as cloverleaf, sometimes have a lot of small offloaded regions, and treating those supplementary array variables as firstprivate caused a measurable I/O overhead. Cesar
2015-10-13 Cesar Philippidis <ce...@codesourcery.com> gcc/fortran/ * trans-array.c (gfc_trans_array_bounds): Add an INIT_VLA argument to control whether VLAs should be initialized. Don't mark this function as static. (gfc_trans_auto_array_allocation): Update call to gfc_trans_array_bounds. (gfc_trans_g77_array): Likewise. * trans-array.h: Declare gfc_trans_array_bounds. * trans-openmp.c (gfc_scan_nodesc_arrays): New function. (gfc_privatize_nodesc_arrays_1): New function. (gfc_privatize_nodesc_arrays): New function. (gfc_init_nodesc_arrays): New function. (gfc_trans_oacc_construct): Initialize any internal variables for arrays without array descriptors inside the offloaded parallel and kernels region. (gfc_trans_oacc_combined_directive): Likewise. gcc/testsuite/ * gfortran.dg/goacc/default_none.f95: New test. diff --git a/gcc/fortran/trans-array.c b/gcc/fortran/trans-array.c index a6b761b..86f983a 100644 --- a/gcc/fortran/trans-array.c +++ b/gcc/fortran/trans-array.c @@ -5709,9 +5709,9 @@ gfc_trans_array_cobounds (tree type, stmtblock_t * pblock, /* Generate code to evaluate non-constant array bounds. Sets *poffset and returns the size (in elements) of the array. */ -static tree +tree gfc_trans_array_bounds (tree type, gfc_symbol * sym, tree * poffset, - stmtblock_t * pblock) + stmtblock_t * pblock, bool init_vla) { gfc_array_spec *as; tree size; @@ -5788,7 +5788,9 @@ gfc_trans_array_bounds (tree type, gfc_symbol * sym, tree * poffset, } gfc_trans_array_cobounds (type, pblock, sym); - gfc_trans_vla_type_sizes (sym, pblock); + + if (init_vla) + gfc_trans_vla_type_sizes (sym, pblock); *poffset = offset; return size; @@ -5852,7 +5854,7 @@ gfc_trans_auto_array_allocation (tree decl, gfc_symbol * sym, && !INTEGER_CST_P (sym->ts.u.cl->backend_decl)) gfc_conv_string_length (sym->ts.u.cl, NULL, &init); - size = gfc_trans_array_bounds (type, sym, &offset, &init); + size = gfc_trans_array_bounds (type, sym, &offset, &init, true); /* Don't actually allocate space for Cray Pointees. */ if (sym->attr.cray_pointee) @@ -5947,7 +5949,7 @@ gfc_trans_g77_array (gfc_symbol * sym, gfc_wrapped_block * block) gfc_conv_string_length (sym->ts.u.cl, NULL, &init); /* Evaluate the bounds of the array. */ - gfc_trans_array_bounds (type, sym, &offset, &init); + gfc_trans_array_bounds (type, sym, &offset, &init, true); /* Set the offset. */ if (TREE_CODE (GFC_TYPE_ARRAY_OFFSET (type)) == VAR_DECL) diff --git a/gcc/fortran/trans-array.h b/gcc/fortran/trans-array.h index 52f1c9a..8dbafb9 100644 --- a/gcc/fortran/trans-array.h +++ b/gcc/fortran/trans-array.h @@ -44,6 +44,8 @@ void gfc_trans_g77_array (gfc_symbol *, gfc_wrapped_block *); /* Generate code to deallocate an array, if it is allocated. */ tree gfc_trans_dealloc_allocated (tree, bool, gfc_expr *); +tree gfc_trans_array_bounds (tree, gfc_symbol *, tree *, stmtblock_t *, bool); + tree gfc_full_array_size (stmtblock_t *, tree, int); tree gfc_duplicate_allocatable (tree, tree, tree, int, tree); diff --git a/gcc/fortran/trans-openmp.c b/gcc/fortran/trans-openmp.c index 8c1e897..f2e9803 100644 --- a/gcc/fortran/trans-openmp.c +++ b/gcc/fortran/trans-openmp.c @@ -39,6 +39,8 @@ along with GCC; see the file COPYING3. If not see #include "arith.h" #include "omp-low.h" #include "gomp-constants.h" +#include "hash-set.h" +#include "tree-iterator.h" int ompws_flags; @@ -2716,22 +2718,157 @@ gfc_trans_omp_code (gfc_code *code, bool force_empty) return stmt; } +void gfc_debug_expr (gfc_expr *); + +/* Add any array that does not have an array descriptor to the hash_set + pointed to by DATA. */ + +static int +gfc_scan_nodesc_arrays (gfc_expr **e, int *walk_subtrees ATTRIBUTE_UNUSED, + void *data) +{ + hash_set<gfc_symbol *> *arrays = (hash_set<gfc_symbol *> *)data; + + if ((*e)->expr_type == EXPR_VARIABLE) + { + gfc_symbol *sym = (*e)->symtree->n.sym; + + if (sym->attr.dimension && gfc_is_nodesc_array (sym)) + arrays->add (sym); + } + + return 0; +} + +/* Build a set of internal array variables (lbound, ubound, stride, etc.) + that need privatization. */ + +static tree +gfc_privatize_nodesc_arrays_1 (tree *tp, int *walk_subtrees, void *data) +{ + hash_set<tree> *decls = (hash_set<tree> *)data; + + if (TREE_CODE (*tp) == MODIFY_EXPR) + { + tree lhs = TREE_OPERAND (*tp, 0); + if (DECL_P (lhs)) + decls->add (lhs); + } + + if (IS_TYPE_OR_DECL_P (*tp)) + *walk_subtrees = false; + + return NULL; +} + +/* Reinitialize all of the arrays inside ARRAY_SET in BLOCK. Append private + clauses for those arrays in CLAUSES. */ + +static tree +gfc_privatize_nodesc_arrays (hash_set<gfc_symbol *> *array_set, + stmtblock_t *block, tree clauses) +{ + hash_set<gfc_symbol *>::iterator its = array_set->begin (); + hash_set<tree> *private_decls = new hash_set<tree>; + + for (; its != array_set->end (); ++its) + { + gfc_symbol *sym = *its; + tree parm = sym->backend_decl; + tree type = TREE_TYPE (parm); + tree offset, tmp; + + /* Evaluate the bounds of the array. */ + gfc_trans_array_bounds (type, sym, &offset, block, false); + + /* Set the offset. */ + if (TREE_CODE (GFC_TYPE_ARRAY_OFFSET (type)) == VAR_DECL) + gfc_add_modify (block, GFC_TYPE_ARRAY_OFFSET (type), offset); + + /* Set the pointer itself if we aren't using the parameter + directly. */ + if (TREE_CODE (parm) != PARM_DECL && DECL_LANG_SPECIFIC (parm) + && GFC_DECL_SAVED_DESCRIPTOR (parm)) + { + tmp = convert (TREE_TYPE (parm), + GFC_DECL_SAVED_DESCRIPTOR (parm)); + gfc_add_modify (block, parm, tmp); + } + } + + /* Add private clauses for any variables that are used by + gfc_trans_array_bounds. */ + walk_tree_without_duplicates (&block->head, gfc_privatize_nodesc_arrays_1, + private_decls); + + hash_set<tree>::iterator itt = private_decls->begin (); + + for (; itt != private_decls->end (); ++itt) + { + tree nc = build_omp_clause (input_location, OMP_CLAUSE_PRIVATE); + OMP_CLAUSE_DECL (nc) = *itt; + OMP_CLAUSE_CHAIN (nc) = clauses; + clauses = nc; + } + + delete private_decls; + + return clauses; +} + +/* Reinitialize any arrays in CLAUSES used inside CODE which do not contain + array descriptors if SCAN_NODESC_ARRAYS is TRUE. Place the initialization + sequences in CODE. Update CLAUSES to contain OMP_CLAUSE_PRIVATE for any + arrays which were initialized. */ + +static hash_set<gfc_symbol *> * +gfc_init_nodesc_arrays (stmtblock_t *inner, tree *clauses, gfc_code *code, + bool scan_nodesc_arrays) +{ + hash_set<gfc_symbol *> *array_set = NULL; + + if (!scan_nodesc_arrays) + return NULL; + + array_set = new hash_set<gfc_symbol *>; + gfc_code_walker (&code, gfc_dummy_code_callback, gfc_scan_nodesc_arrays, + array_set); + + if (array_set->elements ()) + { + gfc_start_block (inner); + pushlevel (); + *clauses = gfc_privatize_nodesc_arrays (array_set, inner, *clauses); + } + else + { + delete array_set; + array_set = NULL; + } + + return array_set; +} + /* Trans OpenACC directives. */ /* parallel, kernels, data and host_data. */ static tree gfc_trans_oacc_construct (gfc_code *code) { - stmtblock_t block; + stmtblock_t block, inner; tree stmt, oacc_clauses; enum tree_code construct_code; + bool scan_nodesc_arrays = false; + hash_set<gfc_symbol *> *array_set = NULL; switch (code->op) { case EXEC_OACC_PARALLEL: construct_code = OACC_PARALLEL; + scan_nodesc_arrays = true; break; case EXEC_OACC_KERNELS: construct_code = OACC_KERNELS; + scan_nodesc_arrays = true; break; case EXEC_OACC_DATA: construct_code = OACC_DATA; @@ -2746,10 +2883,25 @@ gfc_trans_oacc_construct (gfc_code *code) gfc_start_block (&block); oacc_clauses = gfc_trans_omp_clauses (&block, code->ext.omp_clauses, code->loc); + + array_set = gfc_init_nodesc_arrays (&inner, &oacc_clauses, code, + scan_nodesc_arrays); + stmt = gfc_trans_omp_code (code->block->next, true); + + if (array_set && array_set->elements ()) + { + gfc_add_expr_to_block (&inner, stmt); + stmt = gfc_finish_block (&inner); + stmt = build3_v (BIND_EXPR, NULL, stmt, poplevel (1, 0)); + delete array_set; + } + stmt = build2_loc (input_location, construct_code, void_type_node, stmt, oacc_clauses); + gfc_add_expr_to_block (&block, stmt); + return gfc_finish_block (&block); } @@ -3483,18 +3635,22 @@ gfc_trans_omp_do (gfc_code *code, gfc_exec_op op, stmtblock_t *pblock, static tree gfc_trans_oacc_combined_directive (gfc_code *code) { - stmtblock_t block, *pblock = NULL; + stmtblock_t block, inner, *pblock = NULL; gfc_omp_clauses construct_clauses, loop_clauses; tree stmt, oacc_clauses = NULL_TREE; enum tree_code construct_code; + bool scan_nodesc_arrays = false; + hash_set<gfc_symbol *> *array_set = NULL; switch (code->op) { case EXEC_OACC_PARALLEL_LOOP: construct_code = OACC_PARALLEL; + scan_nodesc_arrays = true; break; case EXEC_OACC_KERNELS_LOOP: construct_code = OACC_KERNELS; + scan_nodesc_arrays = true; break; default: gcc_unreachable (); @@ -3526,18 +3682,35 @@ gfc_trans_oacc_combined_directive (gfc_code *code) oacc_clauses = gfc_trans_omp_clauses (&block, &construct_clauses, code->loc); } + + array_set = gfc_init_nodesc_arrays (&inner, &oacc_clauses, code, + scan_nodesc_arrays); + if (!loop_clauses.seq) - pblock = █ + pblock = (array_set && array_set->elements ()) ? &inner : █ else pushlevel (); stmt = gfc_trans_omp_do (code, EXEC_OACC_LOOP, pblock, &loop_clauses, NULL); + + if (array_set && array_set->elements ()) + gfc_add_expr_to_block (&inner, stmt); + if (TREE_CODE (stmt) != BIND_EXPR) stmt = build3_v (BIND_EXPR, NULL, stmt, poplevel (1, 0)); else poplevel (0, 0); + + if (array_set && array_set->elements ()) + { + stmt = gfc_finish_block (&inner); + stmt = build3_v (BIND_EXPR, NULL, stmt, poplevel (1, 0)); + delete array_set; + } + stmt = build2_loc (input_location, construct_code, void_type_node, stmt, oacc_clauses); gfc_add_expr_to_block (&block, stmt); + return gfc_finish_block (&block); } diff --git a/gcc/testsuite/gfortran.dg/goacc/default_none.f95 b/gcc/testsuite/gfortran.dg/goacc/default_none.f95 new file mode 100644 index 0000000..5ce66ae --- /dev/null +++ b/gcc/testsuite/gfortran.dg/goacc/default_none.f95 @@ -0,0 +1,59 @@ +! Ensure that the internal array variables, offset, lbound, etc., don't +! trigger errors with default(none). + +! { dg-do compile } + +program main + implicit none + integer i + integer,parameter :: n = 100 + integer,allocatable :: a1(:), a2(:,:) + + allocate (a1 (n)) + allocate (a2 (-n:n,-n:n)) + a1 (:) = -1 + + !$acc parallel loop default(none) copy (a1(1:n)) + do i = 1,n + a1(i) = i + end do + !$acc end parallel loop + + call foo (a1) + call bar (a1, n) + call foobar (a2,n) + +contains + + subroutine foo (da1) + integer :: da1(n) + + !$acc parallel loop default(none) copy (da1(1:n)) + do i = 1,n + da1(i) = i*2 + end do + !$acc end parallel loop + end subroutine foo +end program main + +subroutine bar (da2,n) + integer :: n, da2(n) + integer i + + !$acc parallel loop default(none) copy (da2(1:n)) firstprivate(n) + do i = 1,n + da2(i) = i*3 + end do + !$acc end parallel loop +end subroutine bar + +subroutine foobar (da3,n) + integer :: n, da3(-n:n,-n:n) + integer i + + !$acc parallel loop default(none) copy (da3(-n:n,-n:n)) firstprivate(n) + do i = 1,n + da3(i,0) = i*3 + end do + !$acc end parallel loop +end subroutine foobar