https://gcc.gnu.org/g:cc72c031b0be60f5c24f52ad0dd2c932f3737766
commit cc72c031b0be60f5c24f52ad0dd2c932f3737766 Author: Tobias Burnus <tob...@codesourcery.com> Date: Thu Mar 23 18:04:17 2023 +0100 Fortran/OpenMP: Fix 'alloc' and 'from' mapping for allocatable components Even with 'alloc' and map-entering 'from' mapping, the following should hold. For explicit mapping, that's already the case, this handles the automatical deep mapping of allocatable components. Namely: * On the device, the array bounds (of allocated allocatables) must match the host, implying 'to' (or 'tofrom') mapping. * On map exiting, the copying out shall not destroy the unallocated allocation status (nor the pointer address of allocated allocatables). The latter was not a problem for allocated allocatables as for those a pointer was GOMP_MAP_ATTACHed; however, for unallocated allocatables, before it copied back device-allocated memory which might not be nullified. While 'alloc' was not deep-mapped at all, for map-entering 'from', the array bounds were not set, making allocated derived-type components inaccessible on the device (and wrong on the host on copy back). The solution is, first, to deep-map 'alloc' as well and to copy to the device even with 'alloc' and (map-entering) 'from'. This copying is only done if there is a scalar (for the unallocated case) or array allocatable directly in the derived type and then it is shallowly copied; the data pointed to is then again only alloc'ed, unless it contains in turn allocatables. gcc/fortran/ * trans-openmp.cc (gfc_has_alloc_comps): Add 'bool shallow_alloc_only=false' arg. (gfc_omp_replace_alloc_by_to_mapping): New, call it. (gfc_omp_deep_map_kind_p): Return 'true' also for '(present,)alloc'. (gfc_omp_deep_mapping_item, gfc_omp_deep_mapping_do): On map entering, replace shallowly 'alloc'/'from' by '(from)to' mapping if there are allocatable components. libgomp/ * testsuite/libgomp.fortran/map-alloc-comp-8.f90: New test. Diff: --- gcc/fortran/ChangeLog.omp | 10 + gcc/fortran/trans-openmp.cc | 96 +++++++- libgomp/ChangeLog.omp | 4 + .../testsuite/libgomp.fortran/map-alloc-comp-8.f90 | 268 +++++++++++++++++++++ 4 files changed, 371 insertions(+), 7 deletions(-) diff --git a/gcc/fortran/ChangeLog.omp b/gcc/fortran/ChangeLog.omp index 26d69b8081f..d64851275b1 100644 --- a/gcc/fortran/ChangeLog.omp +++ b/gcc/fortran/ChangeLog.omp @@ -1,3 +1,13 @@ +2023-03-23 Tobias Burnus <tob...@codesourcery.com> + + * trans-openmp.cc (gfc_has_alloc_comps): Add 'bool + shallow_alloc_only=false' arg. + (gfc_omp_replace_alloc_by_to_mapping): New, call it. + (gfc_omp_deep_map_kind_p): Return 'true' also for '(present,)alloc'. + (gfc_omp_deep_mapping_item, gfc_omp_deep_mapping_do): On map entering, + replace shallowly 'alloc'/'from' by '(from)to' mapping if there are + allocatable components. + 2023-03-23 Tobias Burnus <tob...@codesourcery.com> * class.cc (generate_callback_wrapper): Add attr.class_ok check. diff --git a/gcc/fortran/trans-openmp.cc b/gcc/fortran/trans-openmp.cc index 6dab261c381..a4abc689895 100644 --- a/gcc/fortran/trans-openmp.cc +++ b/gcc/fortran/trans-openmp.cc @@ -389,10 +389,13 @@ gfc_omp_report_decl (tree decl) } /* Return true if TYPE has any allocatable components; - if ptr_ok, the decl itself is permitted to have the POINTER attribute. */ + if ptr_ok, the decl itself is permitted to have the POINTER attribute. + if shallow_alloc_only, returns only true if any of the fields is an + allocatable; called with true by gfc_omp_replace_alloc_by_to_mapping. */ static bool -gfc_has_alloc_comps (tree type, tree decl, bool ptr_ok) +gfc_has_alloc_comps (tree type, tree decl, bool ptr_ok, + bool shallow_alloc_only=false) { tree field, ftype; @@ -425,12 +428,50 @@ gfc_has_alloc_comps (tree type, tree decl, bool ptr_ok) if (GFC_DESCRIPTOR_TYPE_P (ftype) && GFC_TYPE_ARRAY_AKIND (ftype) == GFC_ARRAY_ALLOCATABLE) return true; - if (gfc_has_alloc_comps (ftype, field, false)) + if (!shallow_alloc_only + && gfc_has_alloc_comps (ftype, field, false)) return true; } return false; } +/* gfc_omp_replace_alloc_by_to_mapping is used with gfc_omp_deep_mapping... to + handle the following: + + For map(alloc: dt), the array descriptors of allocatable components should + be mapped as 'to'; this could be done by (A) adding 'map(to: dt%alloc_comp)' + for each component (and avoiding to increment the reference count). + Or (B) by just mapping all of 'dt' as 'to'. + + If 'dt' contains several allocatable components and not much other data, + (A) is more efficient. If 'dt' contains a large const-size array, (A) will + copy it to the device instead of only 'alloc'ating it. + + IMPLEMENTATION CHOICE: We do (A). It avoids the ref-count issue and it is + expected that, for real-world code, derived types with allocatable + components only have few other components and either no const-size arrays. + This copying is done irrespectively whether the allocatables are allocated. + + If users wanted to save memory, they have to use 'map(alloc:dt%comp)' as + also with 'map(alloc:dt)' all components get copied. + + For the copy to the device, only allocatable arrays are relevant as their + the bounds are required; the pointer is set separately (GOMP_MAP_ATTACH) + and the only setting required for scalars. However, when later copying out + of the device, an unallocated allocatable must remain unallocated/NULL on + the host; to achieve this we also must have it set to NULL on the device + to avoid issues with uninitialized memory being copied back for the pointer + address. If we could set the pointer to NULL, gfc_has_alloc_comps's + shallow_alloc_only could be restricted to return true only for arrays. + + We only need to return true if there are allocatable-array components. */ + +static bool +gfc_omp_replace_alloc_by_to_mapping (tree type, tree decl, bool ptr_ok) +{ + return gfc_has_alloc_comps (type, decl, ptr_ok, true); +} + /* Return true if TYPE is polymorphic but not with pointer attribute. */ static bool @@ -2732,7 +2773,15 @@ gfc_omp_deep_mapping_item (bool is_cnt, bool do_copy, bool do_alloc_check, tmp = gfc_conv_descriptor_data_get (tmp); } - gfc_omp_deep_mapping_map (tmp, bytesize, tkind, loc, data_array, + /* For polymorphic, a extended type may have allocatable components; + see comment before gfc_omp_replace_alloc_by_to_mapping. */ + unsigned HOST_WIDE_INT tkind2 = tkind; + if (tkind == GOMP_MAP_ALLOC) + tkind2 = GOMP_MAP_TO; + else if (tkind == GOMP_MAP_FROM + && gimple_omp_target_kind (ctx) != GF_OMP_TARGET_KIND_EXIT_DATA) + tkind2 = GOMP_MAP_TOFROM; + gfc_omp_deep_mapping_map (tmp, bytesize, tkind2, loc, data_array, sizes_array, kinds_array, offset_data, offset, seq, ctx); } @@ -2757,7 +2806,16 @@ gfc_omp_deep_mapping_item (bool is_cnt, bool do_copy, bool do_alloc_check, tmp = decl; bytesize = TYPE_SIZE_UNIT (TREE_TYPE (decl)); } - gfc_omp_deep_mapping_map (tmp, bytesize, tkind, loc, data_array, + unsigned HOST_WIDE_INT tkind2 = tkind; + if (!is_cnt + && (tkind == GOMP_MAP_ALLOC + || (tkind == GOMP_MAP_FROM + && (gimple_omp_target_kind (ctx) + != GF_OMP_TARGET_KIND_EXIT_DATA))) + && gfc_omp_replace_alloc_by_to_mapping (TREE_TYPE (decl), decl, true)) + tkind2 = tkind == GOMP_MAP_ALLOC ? GOMP_MAP_TO : GOMP_MAP_TOFROM; + + gfc_omp_deep_mapping_map (tmp, bytesize, tkind2, loc, data_array, sizes_array, kinds_array, offset_data, offset, seq, ctx); } @@ -2885,8 +2943,8 @@ gfc_omp_deep_map_kind_p (tree clause) case GOMP_MAP_ALWAYS_FROM: case GOMP_MAP_ALWAYS_TOFROM: case GOMP_MAP_FIRSTPRIVATE: - return true; case GOMP_MAP_ALLOC: + return true; case GOMP_MAP_POINTER: case GOMP_MAP_TO_PSET: case GOMP_MAP_FORCE_PRESENT: @@ -2999,6 +3057,21 @@ gfc_omp_deep_mapping_do (bool is_cnt, const gimple *ctx, tree clause, tree decl = gfc_omp_deep_mapping_int_p (ctx, clause); if (decl == NULL_TREE) return NULL_TREE; + /* Handle: map(alloc:dt%cmp [len: ptr_size]) map(tofrom: D.0123...), + where GFC_DECL_SAVED_DESCRIPTOR(D.0123) is the same (here: dt%cmp). */ + if (OMP_CLAUSE_MAP_KIND (clause) == GOMP_MAP_ALLOC + || OMP_CLAUSE_MAP_KIND (clause) == GOMP_MAP_PRESENT_ALLOC) + { + tree c = clause; + while ((c = OMP_CLAUSE_CHAIN (c)) != NULL_TREE) + { + if (!gfc_omp_deep_map_kind_p (c)) + continue; + tree d = gfc_omp_deep_mapping_int_p (ctx, c); + if (d != NULL_TREE && operand_equal_p (decl, d, 0)) + return NULL_TREE; + } + } tree type = TREE_TYPE (decl); if (POINTER_TYPE_P (type)) type = TREE_TYPE (type); @@ -3039,6 +3112,15 @@ gfc_omp_deep_mapping_do (bool is_cnt, const gimple *ctx, tree clause, || GFC_TYPE_ARRAY_AKIND (type) == GFC_ARRAY_POINTER_CONT))) do_alloc_check = true; + if (!is_cnt + && (tkind == GOMP_MAP_ALLOC + || (tkind == GOMP_MAP_FROM + && (gimple_omp_target_kind (ctx) + != GF_OMP_TARGET_KIND_EXIT_DATA))) + && (poly || gfc_omp_replace_alloc_by_to_mapping (type, tmp, true))) + OMP_CLAUSE_SET_MAP_KIND (clause, tkind == GOMP_MAP_ALLOC ? GOMP_MAP_TO + : GOMP_MAP_TOFROM); + /* TODO: For map(a(:)), we know it is present & allocated. */ tree present = (DECL_P (decl) ? gfc_omp_check_optional_argument (decl, true) @@ -3066,7 +3148,7 @@ gfc_omp_deep_mapping_do (bool is_cnt, const gimple *ctx, tree clause, gfc_omp_deep_mapping_item (is_cnt, do_copy, do_alloc_check, loc, decl, &token, tkind, data, sizes, kinds, offset_data, offset, num, seq, ctx); - /* Double: Map + pointer assign. */ + /* Multiply by 2 as there are two mappings: data + pointer assign. */ if (is_cnt) gimplify_assign (num, fold_build2_loc (input_location, MULT_EXPR, diff --git a/libgomp/ChangeLog.omp b/libgomp/ChangeLog.omp index 191c3e61ad3..b3e29fdd39f 100644 --- a/libgomp/ChangeLog.omp +++ b/libgomp/ChangeLog.omp @@ -1,3 +1,7 @@ +2023-03-23 Tobias Burnus <tob...@codesourcery.com> + + * testsuite/libgomp.fortran/map-alloc-comp-8.f90: New test. + 2023-03-10 Thomas Schwinge <tho...@codesourcery.com> PR other/76739 diff --git a/libgomp/testsuite/libgomp.fortran/map-alloc-comp-8.f90 b/libgomp/testsuite/libgomp.fortran/map-alloc-comp-8.f90 new file mode 100644 index 00000000000..9c3c6d49daa --- /dev/null +++ b/libgomp/testsuite/libgomp.fortran/map-alloc-comp-8.f90 @@ -0,0 +1,268 @@ +module m + implicit none (type, external) + type t + integer, allocatable :: A(:) + end type t + type t2 + type(t), allocatable :: vT + integer, allocatable :: x + end type t2 + +contains + + subroutine test_alloc() + type(t) :: var + type(t), allocatable :: var2 + + allocate(var2) + allocate(var%A(4), var2%A(5)) + + !$omp target enter data map(alloc: var, var2) + !$omp target + if (.not. allocated(Var2)) stop 1 + if (.not. allocated(Var%A)) stop 2 + if (.not. allocated(Var2%A)) stop 3 + if (lbound(var%A, 1) /= 1 .or. ubound(var%A, 1) /= 4) stop 4 + if (lbound(var2%A, 1) /= 1 .or. ubound(var2%A, 1) /= 5) stop 5 + var%A = [1,2,3,4] + var2%A = [11,22,33,44,55] + !$omp end target + !$omp target exit data map(from: var, var2) + + if (.not. allocated(Var2)) error stop + if (.not. allocated(Var%A)) error stop + if (.not. allocated(Var2%A)) error stop + if (lbound(var%A, 1) /= 1 .or. ubound(var%A, 1) /= 4) error stop + if (lbound(var2%A, 1) /= 1 .or. ubound(var2%A, 1) /= 5) error stop + if (any(var%A /= [1,2,3,4])) error stop + if (any(var2%A /= [11,22,33,44,55])) error stop + end subroutine test_alloc + + subroutine test2_alloc() + type(t2) :: var + type(t2), allocatable :: var2 + + allocate(var2) + allocate(var%x, var2%x) + + !$omp target enter data map(alloc: var, var2) + !$omp target + if (.not. allocated(Var2)) stop 6 + if (.not. allocated(Var%x)) stop 7 + if (.not. allocated(Var2%x)) stop 8 + var%x = 42 + var2%x = 43 + !$omp end target + !$omp target exit data map(from: var, var2) + + if (.not. allocated(Var2)) error stop + if (.not. allocated(Var%x)) error stop + if (.not. allocated(Var2%x)) error stop + if (var%x /= 42) error stop + if (var2%x /= 43) error stop + + allocate(var%vt, var2%vt) + allocate(var%vt%A(-1:3), var2%vt%A(0:4)) + + !$omp target enter data map(alloc: var, var2) + !$omp target + if (.not. allocated(Var2)) stop 11 + if (.not. allocated(Var%x)) stop 12 + if (.not. allocated(Var2%x)) stop 13 + if (.not. allocated(Var%vt)) stop 14 + if (.not. allocated(Var2%vt)) stop 15 + if (.not. allocated(Var%vt%a)) stop 16 + if (.not. allocated(Var2%vt%a)) stop 17 + var%x = 42 + var2%x = 43 + if (lbound(var%vt%A, 1) /= -1 .or. ubound(var%vt%A, 1) /= 3) stop 4 + if (lbound(var2%vt%A, 1) /= 0 .or. ubound(var2%vt%A, 1) /= 4) stop 5 + var%vt%A = [1,2,3,4,5] + var2%vt%A = [11,22,33,44,55] + !$omp end target + !$omp target exit data map(from: var, var2) + + if (.not. allocated(Var2)) error stop + if (.not. allocated(Var%x)) error stop + if (.not. allocated(Var2%x)) error stop + if (.not. allocated(Var%vt)) error stop + if (.not. allocated(Var2%vt)) error stop + if (.not. allocated(Var%vt%a)) error stop + if (.not. allocated(Var2%vt%a)) error stop + if (var%x /= 42) error stop + if (var2%x /= 43) error stop + if (lbound(var%vt%A, 1) /= -1 .or. ubound(var%vt%A, 1) /= 3) error stop + if (lbound(var2%vt%A, 1) /= 0 .or. ubound(var2%vt%A, 1) /= 4) error stop + if (any(var%vt%A /= [1,2,3,4,5])) error stop + if (any(var2%vt%A /= [11,22,33,44,55])) error stop + end subroutine test2_alloc + + + subroutine test_alloc_target() + type(t) :: var + type(t), allocatable :: var2 + + allocate(var2) + allocate(var%A(4), var2%A(5)) + + !$omp target map(alloc: var, var2) + if (.not. allocated(Var2)) stop 1 + if (.not. allocated(Var%A)) stop 2 + if (.not. allocated(Var2%A)) stop 3 + if (lbound(var%A, 1) /= 1 .or. ubound(var%A, 1) /= 4) stop 4 + if (lbound(var2%A, 1) /= 1 .or. ubound(var2%A, 1) /= 5) stop 5 + var%A = [1,2,3,4] + var2%A = [11,22,33,44,55] + !$omp end target + + if (.not. allocated(Var2)) error stop + if (.not. allocated(Var%A)) error stop + if (.not. allocated(Var2%A)) error stop + if (lbound(var%A, 1) /= 1 .or. ubound(var%A, 1) /= 4) error stop + if (lbound(var2%A, 1) /= 1 .or. ubound(var2%A, 1) /= 5) error stop + end subroutine test_alloc_target + + subroutine test2_alloc_target() + type(t2) :: var + type(t2), allocatable :: var2 + + allocate(var2) + allocate(var%x, var2%x) + + !$omp target map(alloc: var, var2) + if (.not. allocated(Var2)) stop 6 + if (.not. allocated(Var%x)) stop 7 + if (.not. allocated(Var2%x)) stop 8 + var%x = 42 + var2%x = 43 + !$omp end target + + if (.not. allocated(Var2)) error stop + if (.not. allocated(Var%x)) error stop + if (.not. allocated(Var2%x)) error stop + + allocate(var%vt, var2%vt) + allocate(var%vt%A(-1:3), var2%vt%A(0:4)) + + !$omp target map(alloc: var, var2) + if (.not. allocated(Var2)) stop 11 + if (.not. allocated(Var%x)) stop 12 + if (.not. allocated(Var2%x)) stop 13 + if (.not. allocated(Var%vt)) stop 14 + if (.not. allocated(Var2%vt)) stop 15 + if (.not. allocated(Var%vt%a)) stop 16 + if (.not. allocated(Var2%vt%a)) stop 17 + var%x = 42 + var2%x = 43 + if (lbound(var%vt%A, 1) /= -1 .or. ubound(var%vt%A, 1) /= 3) stop 4 + if (lbound(var2%vt%A, 1) /= 0 .or. ubound(var2%vt%A, 1) /= 4) stop 5 + var%vt%A = [1,2,3,4,5] + var2%vt%A = [11,22,33,44,55] + !$omp end target + + if (.not. allocated(Var2)) error stop + if (.not. allocated(Var%x)) error stop + if (.not. allocated(Var2%x)) error stop + if (.not. allocated(Var%vt)) error stop + if (.not. allocated(Var2%vt)) error stop + if (.not. allocated(Var%vt%a)) error stop + if (.not. allocated(Var2%vt%a)) error stop + if (lbound(var%vt%A, 1) /= -1 .or. ubound(var%vt%A, 1) /= 3) error stop + if (lbound(var2%vt%A, 1) /= 0 .or. ubound(var2%vt%A, 1) /= 4) error stop + end subroutine test2_alloc_target + + + + subroutine test_from() + type(t) :: var + type(t), allocatable :: var2 + + allocate(var2) + allocate(var%A(4), var2%A(5)) + + !$omp target map(from: var, var2) + if (.not. allocated(Var2)) stop 1 + if (.not. allocated(Var%A)) stop 2 + if (.not. allocated(Var2%A)) stop 3 + if (lbound(var%A, 1) /= 1 .or. ubound(var%A, 1) /= 4) stop 4 + if (lbound(var2%A, 1) /= 1 .or. ubound(var2%A, 1) /= 5) stop 5 + var%A = [1,2,3,4] + var2%A = [11,22,33,44,55] + !$omp end target + + if (.not. allocated(Var2)) error stop + if (.not. allocated(Var%A)) error stop + if (.not. allocated(Var2%A)) error stop + if (lbound(var%A, 1) /= 1 .or. ubound(var%A, 1) /= 4) error stop + if (lbound(var2%A, 1) /= 1 .or. ubound(var2%A, 1) /= 5) error stop + if (any(var%A /= [1,2,3,4])) error stop + if (any(var2%A /= [11,22,33,44,55])) error stop + end subroutine test_from + + subroutine test2_from() + type(t2) :: var + type(t2), allocatable :: var2 + + allocate(var2) + allocate(var%x, var2%x) + + !$omp target map(from: var, var2) + if (.not. allocated(Var2)) stop 6 + if (.not. allocated(Var%x)) stop 7 + if (.not. allocated(Var2%x)) stop 8 + var%x = 42 + var2%x = 43 + !$omp end target + + if (.not. allocated(Var2)) error stop + if (.not. allocated(Var%x)) error stop + if (.not. allocated(Var2%x)) error stop + if (var%x /= 42) error stop + if (var2%x /= 43) error stop + + allocate(var%vt, var2%vt) + allocate(var%vt%A(-1:3), var2%vt%A(0:4)) + + !$omp target map(from: var, var2) + if (.not. allocated(Var2)) stop 11 + if (.not. allocated(Var%x)) stop 12 + if (.not. allocated(Var2%x)) stop 13 + if (.not. allocated(Var%vt)) stop 14 + if (.not. allocated(Var2%vt)) stop 15 + if (.not. allocated(Var%vt%a)) stop 16 + if (.not. allocated(Var2%vt%a)) stop 17 + var%x = 42 + var2%x = 43 + if (lbound(var%vt%A, 1) /= -1 .or. ubound(var%vt%A, 1) /= 3) stop 4 + if (lbound(var2%vt%A, 1) /= 0 .or. ubound(var2%vt%A, 1) /= 4) stop 5 + var%vt%A = [1,2,3,4,5] + var2%vt%A = [11,22,33,44,55] + !$omp end target + + if (.not. allocated(Var2)) error stop + if (.not. allocated(Var%x)) error stop + if (.not. allocated(Var2%x)) error stop + if (.not. allocated(Var%vt)) error stop + if (.not. allocated(Var2%vt)) error stop + if (.not. allocated(Var%vt%a)) error stop + if (.not. allocated(Var2%vt%a)) error stop + if (var%x /= 42) error stop + if (var2%x /= 43) error stop + if (lbound(var%vt%A, 1) /= -1 .or. ubound(var%vt%A, 1) /= 3) error stop + if (lbound(var2%vt%A, 1) /= 0 .or. ubound(var2%vt%A, 1) /= 4) error stop + if (any(var%vt%A /= [1,2,3,4,5])) error stop + if (any(var2%vt%A /= [11,22,33,44,55])) error stop + end subroutine test2_from + +end module m + +use m + implicit none (type, external) + call test_alloc + call test2_alloc + call test_alloc_target + call test2_alloc_target + + call test_from + call test2_from +end