This patch series implements OpenMP allocators for low-latency memory on nvptx, unified shared memory on both nvptx and amdgcn, and generic pinned memory support for all Linux hosts (an nvptx-specific implementation using Cuda pinned memory is planned for the future, as is low-latency memory on amdgcn).
Patches 01 to 14 are reposts of patches previously submitted, now forward ported to the current master branch and with the various follow-up patches folded in. Where it conflicts with the new memkind implementation the memkind takes precedence (but there's currently no way to implement memory that's both high-bandwidth and pinned anyway). Patches 15 to 17 are new work. I can probably approve these myself, but they can't be committed until the rest of the series is approved. Andrew Andrew Stubbs (11): libgomp, nvptx: low-latency memory allocator libgomp: pinned memory libgomp, openmp: Add ompx_pinned_mem_alloc openmp, nvptx: low-lat memory access traits openmp, nvptx: ompx_unified_shared_mem_alloc openmp: Add -foffload-memory openmp: allow requires unified_shared_memory openmp: -foffload-memory=pinned amdgcn: Support XNACK mode amdgcn, openmp: Auto-detect USM mode and set HSA_XNACK amdgcn: libgomp plugin USM implementation Hafiz Abid Qadeer (6): openmp: Use libgomp memory allocation functions with unified shared memory. Add parsing support for allocate directive (OpenMP 5.0) Translate allocate directive (OpenMP 5.0). Handle cleanup of omp allocated variables (OpenMP 5.0). Gimplify allocate directive (OpenMP 5.0). Lower allocate directive (OpenMP 5.0). gcc/c/c-parser.cc | 22 +- gcc/common.opt | 16 + gcc/config/gcn/gcn-hsa.h | 3 +- gcc/config/gcn/gcn-opts.h | 10 +- gcc/config/gcn/gcn-valu.md | 29 +- gcc/config/gcn/gcn.cc | 62 ++- gcc/config/gcn/gcn.md | 113 +++-- gcc/config/gcn/gcn.opt | 18 +- gcc/config/gcn/mkoffload.cc | 56 ++- gcc/coretypes.h | 7 + gcc/cp/parser.cc | 22 +- gcc/doc/gimple.texi | 38 +- gcc/doc/invoke.texi | 16 +- gcc/fortran/dump-parse-tree.cc | 3 + gcc/fortran/gfortran.h | 5 +- gcc/fortran/match.h | 1 + gcc/fortran/openmp.cc | 242 ++++++++++- gcc/fortran/parse.cc | 10 +- gcc/fortran/resolve.cc | 1 + gcc/fortran/st.cc | 1 + gcc/fortran/trans-decl.cc | 20 + gcc/fortran/trans-openmp.cc | 50 +++ gcc/fortran/trans.cc | 1 + gcc/gimple-pretty-print.cc | 37 ++ gcc/gimple.cc | 12 + gcc/gimple.def | 6 + gcc/gimple.h | 60 ++- gcc/gimplify.cc | 19 + gcc/gsstruct.def | 1 + gcc/omp-builtins.def | 3 + gcc/omp-low.cc | 383 +++++++++++++++++ gcc/passes.def | 1 + .../c-c++-common/gomp/alloc-pinned-1.c | 28 ++ gcc/testsuite/c-c++-common/gomp/usm-1.c | 4 + gcc/testsuite/c-c++-common/gomp/usm-2.c | 46 +++ gcc/testsuite/c-c++-common/gomp/usm-3.c | 44 ++ gcc/testsuite/c-c++-common/gomp/usm-4.c | 4 + gcc/testsuite/g++.dg/gomp/usm-1.C | 32 ++ gcc/testsuite/g++.dg/gomp/usm-2.C | 30 ++ gcc/testsuite/g++.dg/gomp/usm-3.C | 38 ++ gcc/testsuite/gfortran.dg/gomp/allocate-4.f90 | 112 +++++ gcc/testsuite/gfortran.dg/gomp/allocate-5.f90 | 73 ++++ gcc/testsuite/gfortran.dg/gomp/allocate-6.f90 | 84 ++++ gcc/testsuite/gfortran.dg/gomp/allocate-7.f90 | 13 + gcc/testsuite/gfortran.dg/gomp/allocate-8.f90 | 15 + gcc/testsuite/gfortran.dg/gomp/usm-1.f90 | 6 + gcc/testsuite/gfortran.dg/gomp/usm-2.f90 | 16 + gcc/testsuite/gfortran.dg/gomp/usm-3.f90 | 13 + gcc/testsuite/gfortran.dg/gomp/usm-4.f90 | 6 + gcc/tree-core.h | 9 + gcc/tree-pass.h | 1 + gcc/tree-pretty-print.cc | 23 ++ gcc/tree.cc | 1 + gcc/tree.def | 4 + gcc/tree.h | 15 + include/cuda/cuda.h | 12 + libgomp/allocator.c | 304 ++++++++++---- libgomp/config/linux/allocator.c | 137 +++++++ libgomp/config/nvptx/allocator.c | 387 ++++++++++++++++++ libgomp/config/nvptx/team.c | 28 ++ libgomp/libgomp-plugin.h | 3 + libgomp/libgomp.h | 6 + libgomp/libgomp.map | 1 + libgomp/omp.h.in | 5 + libgomp/omp_lib.f90.in | 10 + libgomp/plugin/cuda-lib.def | 2 + libgomp/plugin/plugin-gcn.c | 104 ++++- libgomp/plugin/plugin-nvptx.c | 70 +++- libgomp/target.c | 66 +++ libgomp/testsuite/lib/libgomp.exp | 22 + libgomp/testsuite/libgomp.c++/usm-1.C | 54 +++ .../libgomp.c-c++-common/requires-1.c | 1 + libgomp/testsuite/libgomp.c/alloc-pinned-1.c | 95 +++++ libgomp/testsuite/libgomp.c/alloc-pinned-2.c | 101 +++++ libgomp/testsuite/libgomp.c/alloc-pinned-3.c | 130 ++++++ libgomp/testsuite/libgomp.c/alloc-pinned-4.c | 132 ++++++ libgomp/testsuite/libgomp.c/alloc-pinned-5.c | 90 ++++ libgomp/testsuite/libgomp.c/alloc-pinned-6.c | 101 +++++ libgomp/testsuite/libgomp.c/alloc-pinned-7.c | 63 +++ libgomp/testsuite/libgomp.c/allocators-1.c | 56 +++ libgomp/testsuite/libgomp.c/allocators-2.c | 64 +++ libgomp/testsuite/libgomp.c/allocators-3.c | 42 ++ libgomp/testsuite/libgomp.c/allocators-4.c | 197 +++++++++ libgomp/testsuite/libgomp.c/allocators-5.c | 63 +++ libgomp/testsuite/libgomp.c/allocators-6.c | 118 ++++++ libgomp/testsuite/libgomp.c/allocators-7.c | 68 +++ libgomp/testsuite/libgomp.c/usm-1.c | 25 ++ libgomp/testsuite/libgomp.c/usm-2.c | 33 ++ libgomp/testsuite/libgomp.c/usm-3.c | 36 ++ libgomp/testsuite/libgomp.c/usm-4.c | 37 ++ libgomp/testsuite/libgomp.c/usm-5.c | 28 ++ libgomp/testsuite/libgomp.c/usm-6.c | 92 +++++ .../libgomp.fortran/alloc-pinned-1.f90 | 16 + .../testsuite/libgomp.fortran/allocate-2.f90 | 48 +++ 94 files changed, 4535 insertions(+), 197 deletions(-) create mode 100644 gcc/testsuite/c-c++-common/gomp/alloc-pinned-1.c create mode 100644 gcc/testsuite/c-c++-common/gomp/usm-1.c create mode 100644 gcc/testsuite/c-c++-common/gomp/usm-2.c create mode 100644 gcc/testsuite/c-c++-common/gomp/usm-3.c create mode 100644 gcc/testsuite/c-c++-common/gomp/usm-4.c create mode 100644 gcc/testsuite/g++.dg/gomp/usm-1.C create mode 100644 gcc/testsuite/g++.dg/gomp/usm-2.C create mode 100644 gcc/testsuite/g++.dg/gomp/usm-3.C create mode 100644 gcc/testsuite/gfortran.dg/gomp/allocate-4.f90 create mode 100644 gcc/testsuite/gfortran.dg/gomp/allocate-5.f90 create mode 100644 gcc/testsuite/gfortran.dg/gomp/allocate-6.f90 create mode 100644 gcc/testsuite/gfortran.dg/gomp/allocate-7.f90 create mode 100644 gcc/testsuite/gfortran.dg/gomp/allocate-8.f90 create mode 100644 gcc/testsuite/gfortran.dg/gomp/usm-1.f90 create mode 100644 gcc/testsuite/gfortran.dg/gomp/usm-2.f90 create mode 100644 gcc/testsuite/gfortran.dg/gomp/usm-3.f90 create mode 100644 gcc/testsuite/gfortran.dg/gomp/usm-4.f90 create mode 100644 libgomp/config/nvptx/allocator.c create mode 100644 libgomp/testsuite/libgomp.c++/usm-1.C create mode 100644 libgomp/testsuite/libgomp.c/alloc-pinned-1.c create mode 100644 libgomp/testsuite/libgomp.c/alloc-pinned-2.c create mode 100644 libgomp/testsuite/libgomp.c/alloc-pinned-3.c create mode 100644 libgomp/testsuite/libgomp.c/alloc-pinned-4.c create mode 100644 libgomp/testsuite/libgomp.c/alloc-pinned-5.c create mode 100644 libgomp/testsuite/libgomp.c/alloc-pinned-6.c create mode 100644 libgomp/testsuite/libgomp.c/alloc-pinned-7.c create mode 100644 libgomp/testsuite/libgomp.c/allocators-1.c create mode 100644 libgomp/testsuite/libgomp.c/allocators-2.c create mode 100644 libgomp/testsuite/libgomp.c/allocators-3.c create mode 100644 libgomp/testsuite/libgomp.c/allocators-4.c create mode 100644 libgomp/testsuite/libgomp.c/allocators-5.c create mode 100644 libgomp/testsuite/libgomp.c/allocators-6.c create mode 100644 libgomp/testsuite/libgomp.c/allocators-7.c create mode 100644 libgomp/testsuite/libgomp.c/usm-1.c create mode 100644 libgomp/testsuite/libgomp.c/usm-2.c create mode 100644 libgomp/testsuite/libgomp.c/usm-3.c create mode 100644 libgomp/testsuite/libgomp.c/usm-4.c create mode 100644 libgomp/testsuite/libgomp.c/usm-5.c create mode 100644 libgomp/testsuite/libgomp.c/usm-6.c create mode 100644 libgomp/testsuite/libgomp.fortran/alloc-pinned-1.f90 create mode 100644 libgomp/testsuite/libgomp.fortran/allocate-2.f90 -- 2.33.0