https://github.com/clementval updated https://github.com/llvm/llvm-project/pull/101216
>From 825e6efbbe20041b2b1591617f32abc12a0b42ff Mon Sep 17 00:00:00 2001 From: Valentin Clement <clement...@gmail.com> Date: Fri, 12 Jul 2024 15:20:12 -0700 Subject: [PATCH 1/4] [flang][cuda] Add CUF allocator --- flang/CMakeLists.txt | 7 ++ flang/include/flang/Runtime/CUDA/allocator.h | 43 +++++++++ flang/runtime/CMakeLists.txt | 3 + flang/runtime/CUDA/CMakeLists.txt | 18 ++++ flang/runtime/CUDA/allocator.cpp | 62 +++++++++++++ flang/unittests/Runtime/CMakeLists.txt | 2 + flang/unittests/Runtime/CUDA/AllocatorCUF.cpp | 87 +++++++++++++++++++ flang/unittests/Runtime/CUDA/CMakeLists.txt | 15 ++++ 8 files changed, 237 insertions(+) create mode 100644 flang/include/flang/Runtime/CUDA/allocator.h create mode 100644 flang/runtime/CUDA/CMakeLists.txt create mode 100644 flang/runtime/CUDA/allocator.cpp create mode 100644 flang/unittests/Runtime/CUDA/AllocatorCUF.cpp create mode 100644 flang/unittests/Runtime/CUDA/CMakeLists.txt diff --git a/flang/CMakeLists.txt b/flang/CMakeLists.txt index 070c39eb6e9ab..971e5d5c93f23 100644 --- a/flang/CMakeLists.txt +++ b/flang/CMakeLists.txt @@ -461,6 +461,13 @@ option(FLANG_BUILD_TOOLS if (FLANG_BUILD_TOOLS) add_subdirectory(tools) endif() + +option(FLANG_CUF_RUNTIME + "Compile CUDA Fortran runtime sources" OFF) +if (FLANG_CUF_RUNTIME) + find_package(CUDAToolkit REQUIRED) +endif() + add_subdirectory(runtime) if (LLVM_INCLUDE_EXAMPLES) diff --git a/flang/include/flang/Runtime/CUDA/allocator.h b/flang/include/flang/Runtime/CUDA/allocator.h new file mode 100644 index 0000000000000..0738d1e3a8bf3 --- /dev/null +++ b/flang/include/flang/Runtime/CUDA/allocator.h @@ -0,0 +1,43 @@ +//===-- include/flang/Runtime/CUDA/allocator.h ------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef FORTRAN_RUNTIME_CUDA_ALLOCATOR_H_ +#define FORTRAN_RUNTIME_CUDA_ALLOCATOR_H_ + +#include "flang/Runtime/descriptor.h" + +static constexpr unsigned kPinnedAllocatorPos = 1; +static constexpr unsigned kDeviceAllocatorPos = 2; +static constexpr unsigned kManagedAllocatorPos = 3; + +#define CUDA_REPORT_IF_ERROR(expr) \ + [](CUresult result) { \ + if (!result) \ + return; \ + const char *name = nullptr; \ + cuGetErrorName(result, &name); \ + if (!name) \ + name = "<unknown>"; \ + fprintf(stderr, "'%s' failed with '%s'\n", #expr, name); \ + }(expr) + +namespace Fortran::runtime::cuf { + +void CUFRegisterAllocator(); + +void *CUFAllocPinned(std::size_t); +void CUFFreePinned(void *); + +void *CUFAllocDevice(std::size_t); +void CUFFreeDevice(void *); + +void *CUFAllocManaged(std::size_t); +void CUFFreeManaged(void *); + +} // namespace Fortran::runtime::cuf +#endif // FORTRAN_RUNTIME_CUDA_ALLOCATOR_H_ diff --git a/flang/runtime/CMakeLists.txt b/flang/runtime/CMakeLists.txt index 1f3ae23dcbf12..4537b2d059d65 100644 --- a/flang/runtime/CMakeLists.txt +++ b/flang/runtime/CMakeLists.txt @@ -309,3 +309,6 @@ if (TARGET flang-new AND TARGET module_files) add_dependencies(FortranRuntime flang-new module_files) endif() +if (FLANG_CUF_RUNTIME) + add_subdirectory(CUDA) +endif() diff --git a/flang/runtime/CUDA/CMakeLists.txt b/flang/runtime/CUDA/CMakeLists.txt new file mode 100644 index 0000000000000..e963b6062abc4 --- /dev/null +++ b/flang/runtime/CUDA/CMakeLists.txt @@ -0,0 +1,18 @@ +#===-- runtime/CUDA/CMakeLists.txt -----------------------------------------===# +# +# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +#===------------------------------------------------------------------------===# + +include_directories(${CUDAToolkit_INCLUDE_DIRS}) +find_library(CUDA_RUNTIME_LIBRARY cuda HINTS ${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES} REQUIRED) + +add_flang_library(CufRuntime + allocator.cpp +) +target_link_libraries(CufRuntime +PRIVATE +${CUDA_RUNTIME_LIBRARY} +) diff --git a/flang/runtime/CUDA/allocator.cpp b/flang/runtime/CUDA/allocator.cpp new file mode 100644 index 0000000000000..3c913e344335b --- /dev/null +++ b/flang/runtime/CUDA/allocator.cpp @@ -0,0 +1,62 @@ +//===-- runtime/CUDA/allocator.cpp ----------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "flang/Runtime/CUDA/allocator.h" +#include "../allocator-registry.h" +#include "../derived.h" +#include "../stat.h" +#include "../terminator.h" +#include "../type-info.h" +#include "flang/Common/Fortran.h" +#include "flang/ISO_Fortran_binding_wrapper.h" + +#include "cuda.h" + +namespace Fortran::runtime::cuf { + +void CUFRegisterAllocator() { + allocatorRegistry.Register( + kPinnedAllocatorPos, {&CUFAllocPinned, CUFFreePinned}); + allocatorRegistry.Register( + kDeviceAllocatorPos, {&CUFAllocDevice, CUFFreeDevice}); + allocatorRegistry.Register( + kManagedAllocatorPos, {&CUFAllocManaged, CUFFreeManaged}); +} + +void *CUFAllocPinned(std::size_t sizeInBytes) { + void *p; + CUDA_REPORT_IF_ERROR(cuMemAllocHost(&p, sizeInBytes)); + return p; +} + +void CUFFreePinned(void *p) { + CUDA_REPORT_IF_ERROR(cuMemFree(reinterpret_cast<CUdeviceptr>(p))); +} + +void *CUFAllocDevice(std::size_t sizeInBytes) { + CUdeviceptr p = 0; + CUDA_REPORT_IF_ERROR(cuMemAlloc(&p, sizeInBytes)); + return reinterpret_cast<void *>(p); +} + +void CUFFreeDevice(void *p) { + CUDA_REPORT_IF_ERROR(cuMemFree(reinterpret_cast<CUdeviceptr>(p))); +} + +void *CUFAllocManaged(std::size_t sizeInBytes) { + CUdeviceptr p = 0; + CUDA_REPORT_IF_ERROR( + cuMemAllocManaged(&p, sizeInBytes, CU_MEM_ATTACH_GLOBAL)); + return reinterpret_cast<void *>(p); +} + +void CUFFreeManaged(void *p) { + CUDA_REPORT_IF_ERROR(cuMemFree(reinterpret_cast<CUdeviceptr>(p))); +} + +} // namespace Fortran::runtime::cuf diff --git a/flang/unittests/Runtime/CMakeLists.txt b/flang/unittests/Runtime/CMakeLists.txt index ed047b08ada35..2c3f8c1a9e9ac 100644 --- a/flang/unittests/Runtime/CMakeLists.txt +++ b/flang/unittests/Runtime/CMakeLists.txt @@ -35,3 +35,5 @@ target_link_libraries(FlangRuntimeTests PRIVATE FortranRuntime ) + +add_subdirectory(CUDA) diff --git a/flang/unittests/Runtime/CUDA/AllocatorCUF.cpp b/flang/unittests/Runtime/CUDA/AllocatorCUF.cpp new file mode 100644 index 0000000000000..204826d3f2a96 --- /dev/null +++ b/flang/unittests/Runtime/CUDA/AllocatorCUF.cpp @@ -0,0 +1,87 @@ +//===-- flang/unittests/Runtime/AllocatableCUF.cpp ---------------*- C++-*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "gtest/gtest.h" +#include "flang/Common/Fortran.h" +#include "flang/Runtime/CUDA/allocator.h" +#include "flang/Runtime/allocatable.h" + +#include "cuda.h" + +using namespace Fortran::runtime; + +static OwningPtr<Descriptor> createAllocatable( + Fortran::common::TypeCategory tc, int kind, int rank = 1) { + return Descriptor::Create(TypeCode{tc, kind}, kind, nullptr, rank, nullptr, + CFI_attribute_allocatable); +} + +thread_local static int32_t defaultDevice = 0; + +CUdevice getDefaultCuDevice() { + CUdevice device; + CUDA_REPORT_IF_ERROR(cuDeviceGet(&device, /*ordinal=*/defaultDevice)); + return device; +} + +class ScopedContext { +public: + ScopedContext() { + // Static reference to CUDA primary context for device ordinal + // defaultDevice. + static CUcontext context = [] { + CUDA_REPORT_IF_ERROR(cuInit(/*flags=*/0)); + CUcontext ctx; + // Note: this does not affect the current context. + CUDA_REPORT_IF_ERROR( + cuDevicePrimaryCtxRetain(&ctx, getDefaultCuDevice())); + return ctx; + }(); + + CUDA_REPORT_IF_ERROR(cuCtxPushCurrent(context)); + } + + ~ScopedContext() { CUDA_REPORT_IF_ERROR(cuCtxPopCurrent(nullptr)); } +}; + +TEST(AllocatableCUFTest, SimpleDeviceAllocate) { + using Fortran::common::TypeCategory; + Fortran::runtime::cuf::CUFRegisterAllocator(); + ScopedContext ctx; + // REAL(4), DEVICE, ALLOCATABLE :: a(:) + auto a{createAllocatable(TypeCategory::Real, 4)}; + a->raw().SetAllocIdx(kDeviceAllocatorPos); + EXPECT_EQ((int)kDeviceAllocatorPos, a->raw().GetAllocIdx()); + EXPECT_FALSE(a->raw().HasAddendum()); + RTNAME(AllocatableSetBounds)(*a, 0, 1, 10); + RTNAME(AllocatableAllocate) + (*a, /*hasStat=*/false, /*errMsg=*/nullptr, __FILE__, __LINE__); + EXPECT_TRUE(a->IsAllocated()); + RTNAME(AllocatableDeallocate) + (*a, /*hasStat=*/false, /*errMsg=*/nullptr, __FILE__, __LINE__); + EXPECT_FALSE(a->IsAllocated()); +} + +TEST(AllocatableCUFTest, SimplePinnedAllocate) { + using Fortran::common::TypeCategory; + Fortran::runtime::cuf::CUFRegisterAllocator(); + ScopedContext ctx; + // INTEGER(4), PINNED, ALLOCATABLE :: a(:) + auto a{createAllocatable(TypeCategory::Integer, 4)}; + EXPECT_FALSE(a->raw().HasAddendum()); + a->raw().SetAllocIdx(kPinnedAllocatorPos); + EXPECT_EQ((int)kPinnedAllocatorPos, a->raw().GetAllocIdx()); + EXPECT_FALSE(a->raw().HasAddendum()); + RTNAME(AllocatableSetBounds)(*a, 0, 1, 10); + RTNAME(AllocatableAllocate) + (*a, /*hasStat=*/false, /*errMsg=*/nullptr, __FILE__, __LINE__); + EXPECT_TRUE(a->IsAllocated()); + RTNAME(AllocatableDeallocate) + (*a, /*hasStat=*/false, /*errMsg=*/nullptr, __FILE__, __LINE__); + EXPECT_FALSE(a->IsAllocated()); +} diff --git a/flang/unittests/Runtime/CUDA/CMakeLists.txt b/flang/unittests/Runtime/CUDA/CMakeLists.txt new file mode 100644 index 0000000000000..14b5c788719b8 --- /dev/null +++ b/flang/unittests/Runtime/CUDA/CMakeLists.txt @@ -0,0 +1,15 @@ +if (FLANG_CUF_RUNTIME) + +add_flang_unittest(FlangCufRuntimeTests + AllocatorCUF.cpp +) + +target_link_libraries(FlangCufRuntimeTests + PRIVATE + CufRuntime + FortranRuntime +) + +target_include_directories(FlangCufRuntimeTests PRIVATE ${CUDAToolkit_INCLUDE_DIRS}) + +endif() >From 86ce320d39a6ea0fd25fad592453bdc2033e6103 Mon Sep 17 00:00:00 2001 From: Valentin Clement <clement...@gmail.com> Date: Tue, 30 Jul 2024 13:48:42 -0700 Subject: [PATCH 2/4] Use Terminator and switch pinned deallocator to cuFreeMemHost --- flang/include/flang/Runtime/CUDA/allocator.h | 3 ++- flang/runtime/CUDA/allocator.cpp | 4 ++-- flang/unittests/Runtime/CUDA/AllocatorCUF.cpp | 1 + 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/flang/include/flang/Runtime/CUDA/allocator.h b/flang/include/flang/Runtime/CUDA/allocator.h index 0738d1e3a8bf3..9f6fb55bea744 100644 --- a/flang/include/flang/Runtime/CUDA/allocator.h +++ b/flang/include/flang/Runtime/CUDA/allocator.h @@ -23,7 +23,8 @@ static constexpr unsigned kManagedAllocatorPos = 3; cuGetErrorName(result, &name); \ if (!name) \ name = "<unknown>"; \ - fprintf(stderr, "'%s' failed with '%s'\n", #expr, name); \ + Terminator terminator{__FILE__, __LINE__}; \ + terminator.Crash("'%s' failed with '%s'", #expr, name); \ }(expr) namespace Fortran::runtime::cuf { diff --git a/flang/runtime/CUDA/allocator.cpp b/flang/runtime/CUDA/allocator.cpp index 3c913e344335b..899532a1a5e1c 100644 --- a/flang/runtime/CUDA/allocator.cpp +++ b/flang/runtime/CUDA/allocator.cpp @@ -6,11 +6,11 @@ // //===----------------------------------------------------------------------===// +#include "../terminator.h" #include "flang/Runtime/CUDA/allocator.h" #include "../allocator-registry.h" #include "../derived.h" #include "../stat.h" -#include "../terminator.h" #include "../type-info.h" #include "flang/Common/Fortran.h" #include "flang/ISO_Fortran_binding_wrapper.h" @@ -35,7 +35,7 @@ void *CUFAllocPinned(std::size_t sizeInBytes) { } void CUFFreePinned(void *p) { - CUDA_REPORT_IF_ERROR(cuMemFree(reinterpret_cast<CUdeviceptr>(p))); + CUDA_REPORT_IF_ERROR(cuMemFreeHost(p)); } void *CUFAllocDevice(std::size_t sizeInBytes) { diff --git a/flang/unittests/Runtime/CUDA/AllocatorCUF.cpp b/flang/unittests/Runtime/CUDA/AllocatorCUF.cpp index 204826d3f2a96..caa62be6aa921 100644 --- a/flang/unittests/Runtime/CUDA/AllocatorCUF.cpp +++ b/flang/unittests/Runtime/CUDA/AllocatorCUF.cpp @@ -10,6 +10,7 @@ #include "flang/Common/Fortran.h" #include "flang/Runtime/CUDA/allocator.h" #include "flang/Runtime/allocatable.h" +#include "../../../runtime/terminator.h" #include "cuda.h" >From 9464c6e019358b7eac56ad705c6cf0cbd3287610 Mon Sep 17 00:00:00 2001 From: Valentin Clement <clement...@gmail.com> Date: Tue, 30 Jul 2024 13:49:47 -0700 Subject: [PATCH 3/4] clang-format --- flang/runtime/CUDA/allocator.cpp | 6 ++---- flang/unittests/Runtime/CUDA/AllocatorCUF.cpp | 2 +- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/flang/runtime/CUDA/allocator.cpp b/flang/runtime/CUDA/allocator.cpp index 899532a1a5e1c..02eaba5636990 100644 --- a/flang/runtime/CUDA/allocator.cpp +++ b/flang/runtime/CUDA/allocator.cpp @@ -6,11 +6,11 @@ // //===----------------------------------------------------------------------===// -#include "../terminator.h" #include "flang/Runtime/CUDA/allocator.h" #include "../allocator-registry.h" #include "../derived.h" #include "../stat.h" +#include "../terminator.h" #include "../type-info.h" #include "flang/Common/Fortran.h" #include "flang/ISO_Fortran_binding_wrapper.h" @@ -34,9 +34,7 @@ void *CUFAllocPinned(std::size_t sizeInBytes) { return p; } -void CUFFreePinned(void *p) { - CUDA_REPORT_IF_ERROR(cuMemFreeHost(p)); -} +void CUFFreePinned(void *p) { CUDA_REPORT_IF_ERROR(cuMemFreeHost(p)); } void *CUFAllocDevice(std::size_t sizeInBytes) { CUdeviceptr p = 0; diff --git a/flang/unittests/Runtime/CUDA/AllocatorCUF.cpp b/flang/unittests/Runtime/CUDA/AllocatorCUF.cpp index caa62be6aa921..c267ec9203569 100644 --- a/flang/unittests/Runtime/CUDA/AllocatorCUF.cpp +++ b/flang/unittests/Runtime/CUDA/AllocatorCUF.cpp @@ -7,10 +7,10 @@ //===----------------------------------------------------------------------===// #include "gtest/gtest.h" +#include "../../../runtime/terminator.h" #include "flang/Common/Fortran.h" #include "flang/Runtime/CUDA/allocator.h" #include "flang/Runtime/allocatable.h" -#include "../../../runtime/terminator.h" #include "cuda.h" >From e544123e4d09cc7283bc04460543145dacbeb570 Mon Sep 17 00:00:00 2001 From: Valentin Clement <clement...@gmail.com> Date: Tue, 30 Jul 2024 14:05:19 -0700 Subject: [PATCH 4/4] Add FortranRuntime dependency --- flang/runtime/CUDA/CMakeLists.txt | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/flang/runtime/CUDA/CMakeLists.txt b/flang/runtime/CUDA/CMakeLists.txt index e963b6062abc4..de1104f07ce6c 100644 --- a/flang/runtime/CUDA/CMakeLists.txt +++ b/flang/runtime/CUDA/CMakeLists.txt @@ -13,6 +13,7 @@ add_flang_library(CufRuntime allocator.cpp ) target_link_libraries(CufRuntime -PRIVATE -${CUDA_RUNTIME_LIBRARY} + PRIVATE + FortranRuntime + ${CUDA_RUNTIME_LIBRARY} ) _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits