This patch (by Cesar) changes the way that mapping of firstprivate scalars works for OpenACC. For scalars whose type has a size equal to or smaller than the size of a pointer, rather than copying the value of the scalar to the target device and having a separate mapping for a pointer to the copied value, a single "pointer" is mapped whose bits are a type-punned representation of the value itself.
This is a performance optimisation: the idea, IIUC, is that it is a good idea to avoid having all launched compute resources contend for a single memory location -- the pointed-to cell containing the scalar on the device, in this case. Cesar talks about speedups obtained here (for an earlier version of the patch): https://gcc.gnu.org/ml/gcc-patches/2017-01/msg02171.html The patch implies an API change for the libgomp plugin, in that it must now understand that NULL device pointers correspond to host pointers that are actually type-punned scalars. Tested with offloading to NVPTX and bootstrapped. OK for mainline? Julian ChangeLog 2018-09-20 Cesar Philippidis <ce...@codesourcery.com> Julian Brown <jul...@codesourcery.com> gcc/ * omp-low.c (maybe_lookup_field_in_outer_ctx): New function. (convert_to_firstprivate_int): New function. (convert_from_firstprivate_int): New function. (lower_omp_target): Enable GOMP_MAP_FIRSTPRIVATE_INT in OpenACC. libgomp/ * oacc-parallel.c (GOACC_parallel_keyed): Handle GOMP_MAP_FIRSTPRIVATE_INT host addresses. * plugin/plugin-nvptx.c (nvptx_exec): Handle GOMP_MAP_FIRSTPRIVATE_INT host addresses. * testsuite/libgomp.oacc-c++/firstprivate-int.C: New test. * testsuite/libgomp.oacc-c-c++-common/firstprivate-int.c: New test. * testsuite/libgomp.oacc-fortran/firstprivate-int.f90: New test.
>From 1263a1bef1780fd015f9ee937c2b2df2717f1603 Mon Sep 17 00:00:00 2001 From: Julian Brown <jul...@codesourcery.com> Date: Mon, 17 Sep 2018 19:38:21 -0700 Subject: [PATCH 1/2] Enable GOMP_MAP_FIRSTPRIVATE_INT for OpenACC gcc/ * omp-low.c (maybe_lookup_field_in_outer_ctx): New function. (convert_to_firstprivate_int): New function. (convert_from_firstprivate_int): New function. (lower_omp_target): Enable GOMP_MAP_FIRSTPRIVATE_INT in OpenACC. libgomp/ * oacc-parallel.c (GOACC_parallel_keyed): Handle GOMP_MAP_FIRSTPRIVATE_INT host addresses. * plugin/plugin-nvptx.c (nvptx_exec): Handle GOMP_MAP_FIRSTPRIVATE_INT host addresses. * testsuite/libgomp.oacc-c++/firstprivate-int.C: New test. * testsuite/libgomp.oacc-c-c++-common/firstprivate-int.c: New test. * testsuite/libgomp.oacc-fortran/firstprivate-int.f90: New test. --- gcc/omp-low.c | 171 +++++++++++++++-- libgomp/oacc-parallel.c | 7 +- libgomp/plugin/plugin-nvptx.c | 2 +- .../testsuite/libgomp.oacc-c++/firstprivate-int.C | 83 +++++++++ .../libgomp.oacc-c-c++-common/firstprivate-int.c | 67 +++++++ .../libgomp.oacc-fortran/firstprivate-int.f90 | 205 +++++++++++++++++++++ 6 files changed, 518 insertions(+), 17 deletions(-) create mode 100644 libgomp/testsuite/libgomp.oacc-c++/firstprivate-int.C create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/firstprivate-int.c create mode 100644 libgomp/testsuite/libgomp.oacc-fortran/firstprivate-int.f90 diff --git a/gcc/omp-low.c b/gcc/omp-low.c index fdabf67..5fc4a66 100644 --- a/gcc/omp-low.c +++ b/gcc/omp-low.c @@ -3264,6 +3264,19 @@ maybe_lookup_decl_in_outer_ctx (tree decl, omp_context *ctx) return t ? t : decl; } +/* Returns true if DECL is present inside a field that encloses CTX. */ + +static bool +maybe_lookup_field_in_outer_ctx (tree decl, omp_context *ctx) +{ + omp_context *up; + + for (up = ctx->outer; up; up = up->outer) + if (maybe_lookup_field (decl, up)) + return true; + + return false; +} /* Construct the initialization value for reduction operation OP. */ @@ -7470,6 +7483,88 @@ lower_omp_taskreg (gimple_stmt_iterator *gsi_p, omp_context *ctx) } } +/* Helper function for lower_omp_target. Converts VAR to something + that can be represented by a POINTER_SIZED_INT_NODE. Any new + instructions are appended to GS. This is primarily used to + optimize firstprivate variables, so that small types (less + precision than POINTER_SIZE) do not require additional data + mappings. */ + +static tree +convert_to_firstprivate_int (tree var, gimple_seq *gs) +{ + tree type = TREE_TYPE (var), new_type = NULL_TREE; + tree tmp = NULL_TREE; + + if (omp_is_reference (var)) + type = TREE_TYPE (type); + + if (INTEGRAL_TYPE_P (type) || POINTER_TYPE_P (type)) + { + if (omp_is_reference (var)) + { + tmp = create_tmp_var (type); + gimplify_assign (tmp, build_simple_mem_ref (var), gs); + var = tmp; + } + + return fold_convert (pointer_sized_int_node, var); + } + + gcc_assert (tree_to_uhwi (TYPE_SIZE (type)) <= POINTER_SIZE); + + new_type = lang_hooks.types.type_for_size (tree_to_uhwi (TYPE_SIZE (type)), + true); + + if (omp_is_reference (var)) + { + tmp = create_tmp_var (type); + gimplify_assign (tmp, build_simple_mem_ref (var), gs); + var = tmp; + } + + tmp = create_tmp_var (new_type); + var = fold_build1 (VIEW_CONVERT_EXPR, new_type, var); + gimplify_assign (tmp, var, gs); + + return fold_convert (pointer_sized_int_node, tmp); +} + +/* Like convert_to_firstprivate_int, but restore the original type. */ + +static tree +convert_from_firstprivate_int (tree var, bool is_ref, gimple_seq *gs) +{ + tree type = TREE_TYPE (var); + tree new_type = NULL_TREE; + tree tmp = NULL_TREE; + + gcc_assert (TREE_CODE (var) == MEM_REF); + var = TREE_OPERAND (var, 0); + + if (INTEGRAL_TYPE_P (var) || POINTER_TYPE_P (type)) + return fold_convert (type, var); + + gcc_assert (tree_to_uhwi (TYPE_SIZE (type)) <= POINTER_SIZE); + + new_type = lang_hooks.types.type_for_size (tree_to_uhwi (TYPE_SIZE (type)), + true); + + tmp = create_tmp_var (new_type); + var = fold_convert (new_type, var); + gimplify_assign (tmp, var, gs); + var = fold_build1 (VIEW_CONVERT_EXPR, type, tmp); + + if (is_ref) + { + tmp = create_tmp_var (build_pointer_type (type)); + gimplify_assign (tmp, build_fold_addr_expr (var), gs); + var = tmp; + } + + return var; +} + /* Lower the GIMPLE_OMP_TARGET in the current statement in GSI_P. CTX holds context information for the directive. */ @@ -7631,25 +7726,46 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx) if (offloaded && !(OMP_CLAUSE_CODE (c) == OMP_CLAUSE_MAP && OMP_CLAUSE_MAP_IN_REDUCTION (c))) { - x = build_receiver_ref (var, true, ctx); + tree var_type = TREE_TYPE (var); tree new_var = lookup_decl (var, ctx); + tree inner_type = omp_is_reference (new_var) + ? TREE_TYPE (var_type) : var_type; + + x = build_receiver_ref (var, true, ctx); + + if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_FIRSTPRIVATE + && (TREE_CODE (inner_type) == REAL_TYPE + || (!omp_is_reference (var) + && INTEGRAL_TYPE_P (inner_type)) + || TREE_CODE (inner_type) == INTEGER_TYPE) + && tree_to_uhwi (TYPE_SIZE (inner_type)) <= POINTER_SIZE + && TYPE_PRECISION (inner_type) != 0 + && !maybe_lookup_field_in_outer_ctx (var, ctx)) + { + gcc_assert (is_gimple_omp_oacc (ctx->stmt)); + x = convert_from_firstprivate_int (x, omp_is_reference (var), + &fplist); + gimplify_assign (new_var, x, &fplist); + map_cnt++; + break; + } if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_MAP && OMP_CLAUSE_MAP_KIND (c) == GOMP_MAP_POINTER && !OMP_CLAUSE_MAP_ZERO_BIAS_ARRAY_SECTION (c) - && TREE_CODE (TREE_TYPE (var)) == ARRAY_TYPE) + && TREE_CODE (var_type) == ARRAY_TYPE) x = build_simple_mem_ref (x); + if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_FIRSTPRIVATE) { gcc_assert (is_gimple_omp_oacc (ctx->stmt)); if (omp_is_reference (new_var) - && TREE_CODE (TREE_TYPE (new_var)) != POINTER_TYPE) + && TREE_CODE (var_type) != POINTER_TYPE) { /* Create a local object to hold the instance value. */ - tree type = TREE_TYPE (TREE_TYPE (new_var)); const char *id = IDENTIFIER_POINTER (DECL_NAME (new_var)); - tree inst = create_tmp_var (type, id); + tree inst = create_tmp_var (TREE_TYPE (var_type), id); gimplify_assign (inst, fold_indirect_ref (x), &fplist); x = build_fold_addr_expr (inst); } @@ -7804,6 +7920,7 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx) { tree ovar, nc, s, purpose, var, x, type; unsigned int talign; + bool oacc_firstprivate_int; default: break; @@ -7812,6 +7929,7 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx) case OMP_CLAUSE_TO: case OMP_CLAUSE_FROM: oacc_firstprivate_map: + oacc_firstprivate_int = false; nc = c; ovar = OMP_CLAUSE_DECL (c); if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_MAP @@ -7877,8 +7995,25 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx) } else if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_FIRSTPRIVATE) { - gcc_assert (is_gimple_omp_oacc (ctx->stmt)); - if (!omp_is_reference (var)) + gcc_checking_assert (is_gimple_omp_oacc (ctx->stmt)); + tree type = TREE_TYPE (var); + tree inner_type = omp_is_reference (var) + ? TREE_TYPE (type) : type; + if ((TREE_CODE (inner_type) == REAL_TYPE + || (!omp_is_reference (var) + && INTEGRAL_TYPE_P (inner_type)) + || TREE_CODE (inner_type) == INTEGER_TYPE) + && tree_to_uhwi (TYPE_SIZE (inner_type)) <= POINTER_SIZE + && TYPE_PRECISION (inner_type) != 0 + && !maybe_lookup_field_in_outer_ctx (var, ctx)) + { + oacc_firstprivate_int = true; + if (is_gimple_reg (var) + && OMP_CLAUSE_FIRSTPRIVATE_IMPLICIT (c)) + TREE_NO_WARNING (var) = 1; + var = convert_to_firstprivate_int (var, &ilist); + } + else if (!omp_is_reference (var)) { if (is_gimple_reg (var) && OMP_CLAUSE_FIRSTPRIVATE_IMPLICIT (c)) @@ -7930,10 +8065,15 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx) if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_FIRSTPRIVATE) { gcc_checking_assert (is_gimple_omp_oacc (ctx->stmt)); - s = TREE_TYPE (ovar); - if (TREE_CODE (s) == REFERENCE_TYPE) - s = TREE_TYPE (s); - s = TYPE_SIZE_UNIT (s); + if (oacc_firstprivate_int) + s = size_int (0); + else + { + s = TREE_TYPE (ovar); + if (TREE_CODE (s) == REFERENCE_TYPE) + s = TREE_TYPE (s); + s = TYPE_SIZE_UNIT (s); + } } else s = OMP_CLAUSE_SIZE (c); @@ -7983,7 +8123,10 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx) break; case OMP_CLAUSE_FIRSTPRIVATE: gcc_checking_assert (is_gimple_omp_oacc (ctx->stmt)); - tkind = GOMP_MAP_TO; + if (oacc_firstprivate_int) + tkind = GOMP_MAP_FIRSTPRIVATE_INT; + else + tkind = GOMP_MAP_TO; tkind_zero = tkind; break; case OMP_CLAUSE_TO: @@ -8039,7 +8182,7 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx) if (omp_is_reference (ovar)) type = TREE_TYPE (type); if ((INTEGRAL_TYPE_P (type) - && TYPE_PRECISION (type) <= POINTER_SIZE) + && tree_to_uhwi (TYPE_SIZE (type)) <= POINTER_SIZE) || TREE_CODE (type) == POINTER_TYPE) { tkind = GOMP_MAP_FIRSTPRIVATE_INT; @@ -8194,7 +8337,7 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx) if (omp_is_reference (var)) type = TREE_TYPE (type); if ((INTEGRAL_TYPE_P (type) - && TYPE_PRECISION (type) <= POINTER_SIZE) + && tree_to_uhwi (TYPE_SIZE (type)) <= POINTER_SIZE) || TREE_CODE (type) == POINTER_TYPE) { x = build_receiver_ref (var, false, ctx); diff --git a/libgomp/oacc-parallel.c b/libgomp/oacc-parallel.c index 13ee1c9..070c5dc 100644 --- a/libgomp/oacc-parallel.c +++ b/libgomp/oacc-parallel.c @@ -231,8 +231,11 @@ GOACC_parallel_keyed (int device, void (*fn) (void *), devaddrs = gomp_alloca (sizeof (void *) * mapnum); for (i = 0; i < mapnum; i++) - devaddrs[i] = (void *) (tgt->list[i].key->tgt->tgt_start - + tgt->list[i].key->tgt_offset); + if (tgt->list[i].key != NULL) + devaddrs[i] = (void *) (tgt->list[i].key->tgt->tgt_start + + tgt->list[i].key->tgt_offset); + else + devaddrs[i] = NULL; acc_dev->openacc.exec_func (tgt_fn, mapnum, hostaddrs, devaddrs, async, dims, tgt); diff --git a/libgomp/plugin/plugin-nvptx.c b/libgomp/plugin/plugin-nvptx.c index 6492e5f..a6e20bf 100644 --- a/libgomp/plugin/plugin-nvptx.c +++ b/libgomp/plugin/plugin-nvptx.c @@ -1314,7 +1314,7 @@ nvptx_exec (void (*fn), size_t mapnum, void **hostaddrs, void **devaddrs, /* Copy the array of arguments to the mapped page. */ hp = alloca(sizeof(void *) * mapnum); for (i = 0; i < mapnum; i++) - ((void **) hp)[i] = devaddrs[i]; + ((void **) hp)[i] = devaddrs[i] != 0 ? devaddrs[i] : hostaddrs[i]; /* Copy the (device) pointers to arguments to the device */ CUDA_CALL_ASSERT (cuMemcpyHtoD, dp, hp, diff --git a/libgomp/testsuite/libgomp.oacc-c++/firstprivate-int.C b/libgomp/testsuite/libgomp.oacc-c++/firstprivate-int.C new file mode 100644 index 0000000..c7d90d9 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c++/firstprivate-int.C @@ -0,0 +1,83 @@ +/* Verify the GOMP_MAP_FIRSTPRIVATE_INT optimziation on various types. + This test is similer to the test in libgomp.oacc-c-c++-common, but + it focuses on reference types. */ + +#include <assert.h> +#include <stdint.h> +#include <complex.h> + +void test_ref (int8_t &i8i, int8_t &i8o, int16_t &i16i, int16_t &i16o, + int32_t &i32i, int32_t &i32o, int64_t &i64i, int64_t &i64o, + uint8_t &u8i, uint8_t &u8o, uint16_t &u16i, uint16_t &u16o, + uint32_t &u32i, uint32_t &u32o, uint64_t &u64i, uint64_t &u64o, + float &r32i, float &r32o, double &r64i, double &r64o, + int _Complex &cii, int _Complex &cio, + float _Complex &cfi, float _Complex &cfo, + double _Complex &cdi, double _Complex &cdo) +{ +#pragma acc parallel firstprivate (i8i,i16i,i32i,i64i,u8i,u16i,u32i,u64i) \ + firstprivate(r32i,r64i,cii,cfi,cdi) copyout(i8o,i16o,i32o,i64o) \ + copyout(u8o,u16o,u32o,u64o,r32o,r64o,cio,cfo,cdo) num_gangs(1) + { + i8o = i8i; + i16o = i16i; + i32o = i32i; + i64o = i64i; + + u8o = u8i; + u16o = u16i; + u32o = u32i; + u64o = u64i; + + r32o = r32i; + r64o = r64i; + + cio = cii; + cfo = cfi; + cdo = cdi; + } +} + +int +main () +{ + int8_t i8i = -1, i8o; + int16_t i16i = -2, i16o; + int32_t i32i = -3, i32o; + int64_t i64i = -4, i64o; + + uint8_t u8i = 1, u8o; + uint16_t u16i = 2, u16o; + uint32_t u32i = 3, u32o; + uint64_t u64i = 4, u64o; + + float r32i = .5, r32o; + double r64i = .25, r64o; + + int _Complex cii = 2, cio; + float _Complex cfi = 4, cfo; + double _Complex cdi = 8, cdo; + + test_ref (i8i, i8o, i16i, i16o, i32i, i32o, i64i, i64o, u8i, u8o, u16i, + u16o, u32i, u32o, u64i, u64o, r32i, r32o, r64i, r64o, cii, cio, + cfi, cfo, cdi, cdo); + + assert (i8o == i8i); + assert (i16o == i16i); + assert (i32o == i32i); + assert (i64o == i64i); + + assert (u8o == u8i); + assert (u16o == u16i); + assert (u32o == u32i); + assert (u64o == u64i); + + assert (r32o == r32i); + assert (r64o == r64i); + + assert (cio == cii); + assert (cfo == cfi); + assert (cdo == cdi); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/firstprivate-int.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/firstprivate-int.c new file mode 100644 index 0000000..8abb610 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/firstprivate-int.c @@ -0,0 +1,67 @@ +/* Verify the GOMP_MAP_FIRSTPRIVATE_INT optimziation on various types. */ + +#include <assert.h> +#include <stdint.h> +#include <complex.h> + +int +main () +{ + int8_t i8i = -1, i8o; + int16_t i16i = -2, i16o; + int32_t i32i = -3, i32o; + int64_t i64i = -4, i64o; + + uint8_t u8i = 1, u8o; + uint16_t u16i = 2, u16o; + uint32_t u32i = 3, u32o; + uint64_t u64i = 4, u64o; + + float r32i = .5, r32o; + double r64i = .25, r64o; + + int _Complex cii = 2, cio; + float _Complex cfi = 4, cfo; + double _Complex cdi = 8, cdo; + +#pragma acc parallel firstprivate (i8i,i16i,i32i,i64i,u8i,u16i,u32i,u64i) \ + firstprivate(r32i,r64i,cii,cfi,cdi) copyout(i8o,i16o,i32o,i64o) \ + copyout(u8o,u16o,u32o,u64o,r32o,r64o,cio,cfo,cdo) num_gangs(1) + { + i8o = i8i; + i16o = i16i; + i32o = i32i; + i64o = i64i; + + u8o = u8i; + u16o = u16i; + u32o = u32i; + u64o = u64i; + + r32o = r32i; + r64o = r64i; + + cio = cii; + cfo = cfi; + cdo = cdi; + } + + assert (i8o == i8i); + assert (i16o == i16i); + assert (i32o == i32i); + assert (i64o == i64i); + + assert (u8o == u8i); + assert (u16o == u16i); + assert (u32o == u32i); + assert (u64o == u64i); + + assert (r32o == r32i); + assert (r64o == r64i); + + assert (cio == cii); + assert (cfo == cfi); + assert (cdo == cdi); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-fortran/firstprivate-int.f90 b/libgomp/testsuite/libgomp.oacc-fortran/firstprivate-int.f90 new file mode 100644 index 0000000..963b340 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-fortran/firstprivate-int.f90 @@ -0,0 +1,205 @@ +! Verify the GOMP_MAP_FIRSTPRIVATE_INT optimziation on various types. + +! { dg-do run } + +program test + implicit none + + integer (kind=1) :: i1i, i1o + integer (kind=2) :: i2i, i2o + integer (kind=4) :: i4i, i4o + integer (kind=8) :: i8i, i8o + integer (kind=16) :: i16i, i16o + + logical (kind=1) :: l1i, l1o + logical (kind=2) :: l2i, l2o + logical (kind=4) :: l4i, l4o + logical (kind=8) :: l8i, l8o + logical (kind=16) :: l16i, l16o + + real (kind=4) :: r4i, r4o + real (kind=8) :: r8i, r8o + + complex (kind=4) :: c4i, c4o + complex (kind=8) :: c8i, c8o + + character (kind=1) :: ch1i, ch1o + character (kind=4) :: ch4i, ch4o + + i1i = 1 + i2i = 2 + i4i = 3 + i8i = 4 + i16i = 5 + + l1i = .true. + l2i = .false. + l4i = .true. + l8i = .true. + l16i = .false. + + r4i = .5 + r8i = .25 + + c4i = (2, -2) + c8i = (4, -4) + + ch1i = "a" + ch4i = "b" + + !$acc parallel firstprivate(i1i, i2i, i4i, i8i, i16i) & + !$acc copyout(i1o, i2o, i4o, i8o, i16o) & + !$acc firstprivate(l1i, l2i, l4i, l8i, l16i) & + !$acc copyout(l1o, l2o, l4o, l8o, l16o) & + !$acc firstprivate(r4i, r8i) copyout(r4o, r8o) & + !$acc firstprivate(c4i, c8i) copyout(c4o, c8o) & + !$acc firstprivate(ch1i, ch4i) & + !$acc copyout(ch1o, ch4o) + i1o = i1i + i2o = i2i + i4o = i4i + i8o = i8i + i16o = i16i + + l1o = l1i + l2o = l2i + l4o = l4i + l8o = l8i + l16o = l16i + + r4o = r4i + r8o = r8i + + c4o = c4i + c8o = c8i + + ch1o = ch1i + ch4o = ch4i + !$acc end parallel + + if (i1i /= i1o) call abort + if (i2i /= i2o) call abort + if (i4i /= i4o) call abort + if (i8i /= i8o) call abort + if (i16i /= i16o) call abort + + if (l1i .neqv. l1o) call abort + if (l2i .neqv. l2o) call abort + if (l4i .neqv. l4o) call abort + if (l8i .neqv. l8o) call abort + if (l16i .neqv. l16o) call abort + + if (r4i /= r4o) call abort + if (r8i /= r8o) call abort + + if (c4i /= c4o) call abort + if (c8i /= c8o) call abort + + if (ch1i /= ch1o) call abort + if (ch4i /= ch4o) call abort + + call subtest(i1i, i2i, i4i, i8i, i16i, i1o, i2o, i4o, i8o, i16o, & + l1i, l2i, l4i, l8i, l16i, l1o, l2o, l4o, l8o, l16o, & + r4i, r8i, r4o, r8o, c4i, c8i, c4o, c8o, & + ch1i, ch4i, ch1o, ch4o) +end program test + +subroutine subtest(i1i, i2i, i4i, i8i, i16i, i1o, i2o, i4o, i8o, i16o, & + l1i, l2i, l4i, l8i, l16i, l1o, l2o, l4o, l8o, l16o, & + r4i, r8i, r4o, r8o, c4i, c8i, c4o, c8o, & + ch1i, ch4i, ch1o, ch4o) + implicit none + + integer (kind=1) :: i1i, i1o + integer (kind=2) :: i2i, i2o + integer (kind=4) :: i4i, i4o + integer (kind=8) :: i8i, i8o + integer (kind=16) :: i16i, i16o + + logical (kind=1) :: l1i, l1o + logical (kind=2) :: l2i, l2o + logical (kind=4) :: l4i, l4o + logical (kind=8) :: l8i, l8o + logical (kind=16) :: l16i, l16o + + real (kind=4) :: r4i, r4o + real (kind=8) :: r8i, r8o + + complex (kind=4) :: c4i, c4o + complex (kind=8) :: c8i, c8o + + character (kind=1) :: ch1i, ch1o + character (kind=4) :: ch4i, ch4o + + i1i = -i1i + i2i = -i2i + i4i = -i4i + i8i = -i8i + i16i = -i16i + + l1i = .not. l1i + l2i = .not. l2i + l4i = .not. l4i + l8i = .not. l8i + l16i = .not. l16i + + r4i = -r4i + r8i = -r8i + + c4i = -c4i + c8i = -c8i + + ch1i = "z" + ch4i = "y" + + !$acc parallel firstprivate(i1i, i2i, i4i, i8i, i16i) & + !$acc copyout(i1o, i2o, i4o, i8o, i16o) & + !$acc firstprivate(l1i, l2i, l4i, l8i, l16i) & + !$acc copyout(l1o, l2o, l4o, l8o, l16o) & + !$acc firstprivate(r4i, r8i) copyout(r4o, r8o) & + !$acc firstprivate(c4i, c8i) copyout(c4o, c8o) & + !$acc firstprivate(ch1i, ch4i) & + !$acc copyout(ch1o, ch4o) + i1o = i1i + i2o = i2i + i4o = i4i + i8o = i8i + i16o = i16i + + l1o = l1i + l2o = l2i + l4o = l4i + l8o = l8i + l16o = l16i + + r4o = r4i + r8o = r8i + + c4o = c4i + c8o = c8i + + ch1o = ch1i + ch4o = ch4i + !$acc end parallel + + if (i1i /= i1o) call abort + if (i2i /= i2o) call abort + if (i4i /= i4o) call abort + if (i8i /= i8o) call abort + if (i16i /= i16o) call abort + + if (l1i .neqv. l1o) call abort + if (l2i .neqv. l2o) call abort + if (l4i .neqv. l4o) call abort + if (l8i .neqv. l8o) call abort + if (l16i .neqv. l16o) call abort + + if (r4i /= r4o) call abort + if (r8i /= r8o) call abort + + if (c4i /= c4o) call abort + if (c8i /= c8o) call abort + + if (ch1i /= ch1o) call abort + if (ch4i /= ch4o) call abort +end subroutine subtest -- 1.8.1.1