[llvm-branch-commits] [flang] [flang][cuda] Add CUF allocator (PR #101216)

2024-07-30 Thread Valentin Clement バレンタイン クレメン via llvm-branch-commits

https://github.com/clementval updated 
https://github.com/llvm/llvm-project/pull/101216

>From 825e6efbbe20041b2b1591617f32abc12a0b42ff Mon Sep 17 00:00:00 2001
From: Valentin Clement 
Date: Fri, 12 Jul 2024 15:20:12 -0700
Subject: [PATCH 1/4] [flang][cuda] Add CUF allocator

---
 flang/CMakeLists.txt  |  7 ++
 flang/include/flang/Runtime/CUDA/allocator.h  | 43 +
 flang/runtime/CMakeLists.txt  |  3 +
 flang/runtime/CUDA/CMakeLists.txt | 18 
 flang/runtime/CUDA/allocator.cpp  | 62 +
 flang/unittests/Runtime/CMakeLists.txt|  2 +
 flang/unittests/Runtime/CUDA/AllocatorCUF.cpp | 87 +++
 flang/unittests/Runtime/CUDA/CMakeLists.txt   | 15 
 8 files changed, 237 insertions(+)
 create mode 100644 flang/include/flang/Runtime/CUDA/allocator.h
 create mode 100644 flang/runtime/CUDA/CMakeLists.txt
 create mode 100644 flang/runtime/CUDA/allocator.cpp
 create mode 100644 flang/unittests/Runtime/CUDA/AllocatorCUF.cpp
 create mode 100644 flang/unittests/Runtime/CUDA/CMakeLists.txt

diff --git a/flang/CMakeLists.txt b/flang/CMakeLists.txt
index 070c39eb6e9ab..971e5d5c93f23 100644
--- a/flang/CMakeLists.txt
+++ b/flang/CMakeLists.txt
@@ -461,6 +461,13 @@ option(FLANG_BUILD_TOOLS
 if (FLANG_BUILD_TOOLS)
   add_subdirectory(tools)
 endif()
+
+option(FLANG_CUF_RUNTIME
+  "Compile CUDA Fortran runtime sources" OFF)
+if (FLANG_CUF_RUNTIME)
+  find_package(CUDAToolkit REQUIRED)
+endif()
+
 add_subdirectory(runtime)
 
 if (LLVM_INCLUDE_EXAMPLES)
diff --git a/flang/include/flang/Runtime/CUDA/allocator.h 
b/flang/include/flang/Runtime/CUDA/allocator.h
new file mode 100644
index 0..0738d1e3a8bf3
--- /dev/null
+++ b/flang/include/flang/Runtime/CUDA/allocator.h
@@ -0,0 +1,43 @@
+//===-- include/flang/Runtime/CUDA/allocator.h --*- C++ 
-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+
+#ifndef FORTRAN_RUNTIME_CUDA_ALLOCATOR_H_
+#define FORTRAN_RUNTIME_CUDA_ALLOCATOR_H_
+
+#include "flang/Runtime/descriptor.h"
+
+static constexpr unsigned kPinnedAllocatorPos = 1;
+static constexpr unsigned kDeviceAllocatorPos = 2;
+static constexpr unsigned kManagedAllocatorPos = 3;
+
+#define CUDA_REPORT_IF_ERROR(expr) \
+  [](CUresult result) { \
+if (!result) \
+  return; \
+const char *name = nullptr; \
+cuGetErrorName(result, ); \
+if (!name) \
+  name = ""; \
+fprintf(stderr, "'%s' failed with '%s'\n", #expr, name); \
+  }(expr)
+
+namespace Fortran::runtime::cuf {
+
+void CUFRegisterAllocator();
+
+void *CUFAllocPinned(std::size_t);
+void CUFFreePinned(void *);
+
+void *CUFAllocDevice(std::size_t);
+void CUFFreeDevice(void *);
+
+void *CUFAllocManaged(std::size_t);
+void CUFFreeManaged(void *);
+
+} // namespace Fortran::runtime::cuf
+#endif // FORTRAN_RUNTIME_CUDA_ALLOCATOR_H_
diff --git a/flang/runtime/CMakeLists.txt b/flang/runtime/CMakeLists.txt
index 1f3ae23dcbf12..4537b2d059d65 100644
--- a/flang/runtime/CMakeLists.txt
+++ b/flang/runtime/CMakeLists.txt
@@ -309,3 +309,6 @@ if (TARGET flang-new AND TARGET module_files)
   add_dependencies(FortranRuntime flang-new module_files)
 endif()
 
+if (FLANG_CUF_RUNTIME)
+  add_subdirectory(CUDA)
+endif()
diff --git a/flang/runtime/CUDA/CMakeLists.txt 
b/flang/runtime/CUDA/CMakeLists.txt
new file mode 100644
index 0..e963b6062abc4
--- /dev/null
+++ b/flang/runtime/CUDA/CMakeLists.txt
@@ -0,0 +1,18 @@
+#===-- runtime/CUDA/CMakeLists.txt 
-===#
+#
+# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+#
+#======#
+
+include_directories(${CUDAToolkit_INCLUDE_DIRS})
+find_library(CUDA_RUNTIME_LIBRARY cuda HINTS 
${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES} REQUIRED)
+
+add_flang_library(CufRuntime
+  allocator.cpp
+)
+target_link_libraries(CufRuntime
+PRIVATE
+${CUDA_RUNTIME_LIBRARY}
+)
diff --git a/flang/runtime/CUDA/allocator.cpp b/flang/runtime/CUDA/allocator.cpp
new file mode 100644
index 0..3c913e344335b
--- /dev/null
+++ b/flang/runtime/CUDA/allocator.cpp
@@ -0,0 +1,62 @@
+//===-- runtime/CUDA/allocator.cpp 
===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+
+#include "flang/Runtime/CUDA/allocator.h"
+#include 

[llvm-branch-commits] [flang] [flang][cuda] Add CUF allocator (PR #101216)

2024-07-30 Thread Valentin Clement バレンタイン クレメン via llvm-branch-commits


@@ -0,0 +1,18 @@
+#===-- runtime/CUDA/CMakeLists.txt 
-===#
+#
+# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+#
+#======#
+
+include_directories(${CUDAToolkit_INCLUDE_DIRS})
+find_library(CUDA_RUNTIME_LIBRARY cuda HINTS 
${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES} REQUIRED)
+
+add_flang_library(CufRuntime
+  allocator.cpp
+)
+target_link_libraries(CufRuntime
+PRIVATE
+${CUDA_RUNTIME_LIBRARY}

clementval wrote:

Yeah that would make sense!

https://github.com/llvm/llvm-project/pull/101216
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [flang] [flang][cuda] Add CUF allocator (PR #101216)

2024-07-30 Thread Valentin Clement バレンタイン クレメン via llvm-branch-commits

https://github.com/clementval updated 
https://github.com/llvm/llvm-project/pull/101216

>From 825e6efbbe20041b2b1591617f32abc12a0b42ff Mon Sep 17 00:00:00 2001
From: Valentin Clement 
Date: Fri, 12 Jul 2024 15:20:12 -0700
Subject: [PATCH 1/3] [flang][cuda] Add CUF allocator

---
 flang/CMakeLists.txt  |  7 ++
 flang/include/flang/Runtime/CUDA/allocator.h  | 43 +
 flang/runtime/CMakeLists.txt  |  3 +
 flang/runtime/CUDA/CMakeLists.txt | 18 
 flang/runtime/CUDA/allocator.cpp  | 62 +
 flang/unittests/Runtime/CMakeLists.txt|  2 +
 flang/unittests/Runtime/CUDA/AllocatorCUF.cpp | 87 +++
 flang/unittests/Runtime/CUDA/CMakeLists.txt   | 15 
 8 files changed, 237 insertions(+)
 create mode 100644 flang/include/flang/Runtime/CUDA/allocator.h
 create mode 100644 flang/runtime/CUDA/CMakeLists.txt
 create mode 100644 flang/runtime/CUDA/allocator.cpp
 create mode 100644 flang/unittests/Runtime/CUDA/AllocatorCUF.cpp
 create mode 100644 flang/unittests/Runtime/CUDA/CMakeLists.txt

diff --git a/flang/CMakeLists.txt b/flang/CMakeLists.txt
index 070c39eb6e9ab..971e5d5c93f23 100644
--- a/flang/CMakeLists.txt
+++ b/flang/CMakeLists.txt
@@ -461,6 +461,13 @@ option(FLANG_BUILD_TOOLS
 if (FLANG_BUILD_TOOLS)
   add_subdirectory(tools)
 endif()
+
+option(FLANG_CUF_RUNTIME
+  "Compile CUDA Fortran runtime sources" OFF)
+if (FLANG_CUF_RUNTIME)
+  find_package(CUDAToolkit REQUIRED)
+endif()
+
 add_subdirectory(runtime)
 
 if (LLVM_INCLUDE_EXAMPLES)
diff --git a/flang/include/flang/Runtime/CUDA/allocator.h 
b/flang/include/flang/Runtime/CUDA/allocator.h
new file mode 100644
index 0..0738d1e3a8bf3
--- /dev/null
+++ b/flang/include/flang/Runtime/CUDA/allocator.h
@@ -0,0 +1,43 @@
+//===-- include/flang/Runtime/CUDA/allocator.h --*- C++ 
-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+
+#ifndef FORTRAN_RUNTIME_CUDA_ALLOCATOR_H_
+#define FORTRAN_RUNTIME_CUDA_ALLOCATOR_H_
+
+#include "flang/Runtime/descriptor.h"
+
+static constexpr unsigned kPinnedAllocatorPos = 1;
+static constexpr unsigned kDeviceAllocatorPos = 2;
+static constexpr unsigned kManagedAllocatorPos = 3;
+
+#define CUDA_REPORT_IF_ERROR(expr) \
+  [](CUresult result) { \
+if (!result) \
+  return; \
+const char *name = nullptr; \
+cuGetErrorName(result, ); \
+if (!name) \
+  name = ""; \
+fprintf(stderr, "'%s' failed with '%s'\n", #expr, name); \
+  }(expr)
+
+namespace Fortran::runtime::cuf {
+
+void CUFRegisterAllocator();
+
+void *CUFAllocPinned(std::size_t);
+void CUFFreePinned(void *);
+
+void *CUFAllocDevice(std::size_t);
+void CUFFreeDevice(void *);
+
+void *CUFAllocManaged(std::size_t);
+void CUFFreeManaged(void *);
+
+} // namespace Fortran::runtime::cuf
+#endif // FORTRAN_RUNTIME_CUDA_ALLOCATOR_H_
diff --git a/flang/runtime/CMakeLists.txt b/flang/runtime/CMakeLists.txt
index 1f3ae23dcbf12..4537b2d059d65 100644
--- a/flang/runtime/CMakeLists.txt
+++ b/flang/runtime/CMakeLists.txt
@@ -309,3 +309,6 @@ if (TARGET flang-new AND TARGET module_files)
   add_dependencies(FortranRuntime flang-new module_files)
 endif()
 
+if (FLANG_CUF_RUNTIME)
+  add_subdirectory(CUDA)
+endif()
diff --git a/flang/runtime/CUDA/CMakeLists.txt 
b/flang/runtime/CUDA/CMakeLists.txt
new file mode 100644
index 0..e963b6062abc4
--- /dev/null
+++ b/flang/runtime/CUDA/CMakeLists.txt
@@ -0,0 +1,18 @@
+#===-- runtime/CUDA/CMakeLists.txt 
-===#
+#
+# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+#
+#======#
+
+include_directories(${CUDAToolkit_INCLUDE_DIRS})
+find_library(CUDA_RUNTIME_LIBRARY cuda HINTS 
${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES} REQUIRED)
+
+add_flang_library(CufRuntime
+  allocator.cpp
+)
+target_link_libraries(CufRuntime
+PRIVATE
+${CUDA_RUNTIME_LIBRARY}
+)
diff --git a/flang/runtime/CUDA/allocator.cpp b/flang/runtime/CUDA/allocator.cpp
new file mode 100644
index 0..3c913e344335b
--- /dev/null
+++ b/flang/runtime/CUDA/allocator.cpp
@@ -0,0 +1,62 @@
+//===-- runtime/CUDA/allocator.cpp 
===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+
+#include "flang/Runtime/CUDA/allocator.h"
+#include 

[llvm-branch-commits] [flang] [flang][cuda] Add CUF allocator (PR #101216)

2024-07-30 Thread Valentin Clement バレンタイン クレメン via llvm-branch-commits

https://github.com/clementval updated 
https://github.com/llvm/llvm-project/pull/101216

>From 825e6efbbe20041b2b1591617f32abc12a0b42ff Mon Sep 17 00:00:00 2001
From: Valentin Clement 
Date: Fri, 12 Jul 2024 15:20:12 -0700
Subject: [PATCH 1/2] [flang][cuda] Add CUF allocator

---
 flang/CMakeLists.txt  |  7 ++
 flang/include/flang/Runtime/CUDA/allocator.h  | 43 +
 flang/runtime/CMakeLists.txt  |  3 +
 flang/runtime/CUDA/CMakeLists.txt | 18 
 flang/runtime/CUDA/allocator.cpp  | 62 +
 flang/unittests/Runtime/CMakeLists.txt|  2 +
 flang/unittests/Runtime/CUDA/AllocatorCUF.cpp | 87 +++
 flang/unittests/Runtime/CUDA/CMakeLists.txt   | 15 
 8 files changed, 237 insertions(+)
 create mode 100644 flang/include/flang/Runtime/CUDA/allocator.h
 create mode 100644 flang/runtime/CUDA/CMakeLists.txt
 create mode 100644 flang/runtime/CUDA/allocator.cpp
 create mode 100644 flang/unittests/Runtime/CUDA/AllocatorCUF.cpp
 create mode 100644 flang/unittests/Runtime/CUDA/CMakeLists.txt

diff --git a/flang/CMakeLists.txt b/flang/CMakeLists.txt
index 070c39eb6e9ab..971e5d5c93f23 100644
--- a/flang/CMakeLists.txt
+++ b/flang/CMakeLists.txt
@@ -461,6 +461,13 @@ option(FLANG_BUILD_TOOLS
 if (FLANG_BUILD_TOOLS)
   add_subdirectory(tools)
 endif()
+
+option(FLANG_CUF_RUNTIME
+  "Compile CUDA Fortran runtime sources" OFF)
+if (FLANG_CUF_RUNTIME)
+  find_package(CUDAToolkit REQUIRED)
+endif()
+
 add_subdirectory(runtime)
 
 if (LLVM_INCLUDE_EXAMPLES)
diff --git a/flang/include/flang/Runtime/CUDA/allocator.h 
b/flang/include/flang/Runtime/CUDA/allocator.h
new file mode 100644
index 0..0738d1e3a8bf3
--- /dev/null
+++ b/flang/include/flang/Runtime/CUDA/allocator.h
@@ -0,0 +1,43 @@
+//===-- include/flang/Runtime/CUDA/allocator.h --*- C++ 
-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+
+#ifndef FORTRAN_RUNTIME_CUDA_ALLOCATOR_H_
+#define FORTRAN_RUNTIME_CUDA_ALLOCATOR_H_
+
+#include "flang/Runtime/descriptor.h"
+
+static constexpr unsigned kPinnedAllocatorPos = 1;
+static constexpr unsigned kDeviceAllocatorPos = 2;
+static constexpr unsigned kManagedAllocatorPos = 3;
+
+#define CUDA_REPORT_IF_ERROR(expr) \
+  [](CUresult result) { \
+if (!result) \
+  return; \
+const char *name = nullptr; \
+cuGetErrorName(result, ); \
+if (!name) \
+  name = ""; \
+fprintf(stderr, "'%s' failed with '%s'\n", #expr, name); \
+  }(expr)
+
+namespace Fortran::runtime::cuf {
+
+void CUFRegisterAllocator();
+
+void *CUFAllocPinned(std::size_t);
+void CUFFreePinned(void *);
+
+void *CUFAllocDevice(std::size_t);
+void CUFFreeDevice(void *);
+
+void *CUFAllocManaged(std::size_t);
+void CUFFreeManaged(void *);
+
+} // namespace Fortran::runtime::cuf
+#endif // FORTRAN_RUNTIME_CUDA_ALLOCATOR_H_
diff --git a/flang/runtime/CMakeLists.txt b/flang/runtime/CMakeLists.txt
index 1f3ae23dcbf12..4537b2d059d65 100644
--- a/flang/runtime/CMakeLists.txt
+++ b/flang/runtime/CMakeLists.txt
@@ -309,3 +309,6 @@ if (TARGET flang-new AND TARGET module_files)
   add_dependencies(FortranRuntime flang-new module_files)
 endif()
 
+if (FLANG_CUF_RUNTIME)
+  add_subdirectory(CUDA)
+endif()
diff --git a/flang/runtime/CUDA/CMakeLists.txt 
b/flang/runtime/CUDA/CMakeLists.txt
new file mode 100644
index 0..e963b6062abc4
--- /dev/null
+++ b/flang/runtime/CUDA/CMakeLists.txt
@@ -0,0 +1,18 @@
+#===-- runtime/CUDA/CMakeLists.txt 
-===#
+#
+# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+#
+#======#
+
+include_directories(${CUDAToolkit_INCLUDE_DIRS})
+find_library(CUDA_RUNTIME_LIBRARY cuda HINTS 
${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES} REQUIRED)
+
+add_flang_library(CufRuntime
+  allocator.cpp
+)
+target_link_libraries(CufRuntime
+PRIVATE
+${CUDA_RUNTIME_LIBRARY}
+)
diff --git a/flang/runtime/CUDA/allocator.cpp b/flang/runtime/CUDA/allocator.cpp
new file mode 100644
index 0..3c913e344335b
--- /dev/null
+++ b/flang/runtime/CUDA/allocator.cpp
@@ -0,0 +1,62 @@
+//===-- runtime/CUDA/allocator.cpp 
===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+
+#include "flang/Runtime/CUDA/allocator.h"
+#include 

[llvm-branch-commits] [flang] [flang][cuda] Add CUF allocator (PR #101216)

2024-07-30 Thread Valentin Clement バレンタイン クレメン via llvm-branch-commits


@@ -0,0 +1,43 @@
+//===-- include/flang/Runtime/CUDA/allocator.h --*- C++ 
-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+
+#ifndef FORTRAN_RUNTIME_CUDA_ALLOCATOR_H_
+#define FORTRAN_RUNTIME_CUDA_ALLOCATOR_H_
+
+#include "flang/Runtime/descriptor.h"
+
+static constexpr unsigned kPinnedAllocatorPos = 1;
+static constexpr unsigned kDeviceAllocatorPos = 2;
+static constexpr unsigned kManagedAllocatorPos = 3;
+
+#define CUDA_REPORT_IF_ERROR(expr) \
+  [](CUresult result) { \
+if (!result) \
+  return; \
+const char *name = nullptr; \
+cuGetErrorName(result, ); \
+if (!name) \
+  name = ""; \
+fprintf(stderr, "'%s' failed with '%s'\n", #expr, name); \

clementval wrote:

I will update that to use the terminator. 

We don't have the source code information where we call the allocator 
(`Descriptor::Allocate()`). I'll see to update that in a follow up patch if 
it's ok for you. 

https://github.com/llvm/llvm-project/pull/101216
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [flang] [flang] Add allocator_idx attribute on fir.embox and fircg.ext_embox (PR #101212)

2024-07-30 Thread Valentin Clement バレンタイン クレメン via llvm-branch-commits


@@ -103,3 +103,27 @@ func.func @fir_dev_issue_1416(%arg0: 
!fir.ref>, %low: index
 fir.call @do_something(%3) : (!fir.box>) -> ()
 return
 }
+
+// CHECK-LABEL: define void @_QPtest_allocator1()
+func.func @_QPtest_allocator1() {
+  %c20 = arith.constant 20 : index
+  %0 = fir.alloca !fir.array<20xi32> {bindc_name = "x", uniq_name = 
"_QFtest_sliceEx"}
+  %1 = fir.shape %c20 : (index) -> !fir.shape<1>
+  %3 = fir.embox %0(%1) {allocator_idx = 1 : i32} : 
(!fir.ref>, !fir.shape<1>) -> !fir.box>
+  fir.call @_QPtest_callee(%3) : (!fir.box>) -> ()
+  return
+}
+
+// %{{.*}} = insertvalue { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] } { 
ptr undef, i64 ptrtoint (ptr getelementptr (i32, ptr null, i32 1) to i64), i32 
20240719, i8 1, i8 9, i8 0, i8 2, [1 x [3 x i64]] [[3 x i64] [i64 1, i64 20, 
i64 ptrtoint (ptr getelementptr (i32, ptr null, i32 1) to i64)]] }

clementval wrote:

Yeah! Thanks for catching this. 

https://github.com/llvm/llvm-project/pull/101212
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [flang] [flang] Add allocator_idx attribute on fir.embox and fircg.ext_embox (PR #101212)

2024-07-30 Thread Valentin Clement バレンタイン クレメン via llvm-branch-commits

https://github.com/clementval updated 
https://github.com/llvm/llvm-project/pull/101212

>From 77727fdf40e1164d9975378bb6951bc49baaf04a Mon Sep 17 00:00:00 2001
From: Valentin Clement 
Date: Mon, 29 Jul 2024 14:50:05 -0700
Subject: [PATCH 1/2] [flang] Add allocator_idx attribute on fir.embox and
 fircg.ext_embox

---
 .../include/flang/Optimizer/CodeGen/CGOps.td  |  4 ++-
 .../include/flang/Optimizer/Dialect/FIROps.td | 10 +--
 .../flang/Runtime}/allocator-registry.h   |  6 +++-
 flang/lib/Optimizer/CodeGen/CodeGen.cpp   | 28 ++-
 flang/lib/Optimizer/CodeGen/PreCGRewrite.cpp  |  4 +--
 flang/runtime/allocator-registry.cpp  |  2 +-
 flang/runtime/descriptor.cpp  |  2 +-
 flang/test/Fir/embox.fir  | 24 
 8 files changed, 64 insertions(+), 16 deletions(-)
 rename flang/{runtime => include/flang/Runtime}/allocator-registry.h (90%)

diff --git a/flang/include/flang/Optimizer/CodeGen/CGOps.td 
b/flang/include/flang/Optimizer/CodeGen/CGOps.td
index f4740a263ffd2..34c5dc07284f0 100644
--- a/flang/include/flang/Optimizer/CodeGen/CGOps.td
+++ b/flang/include/flang/Optimizer/CodeGen/CGOps.td
@@ -48,6 +48,7 @@ def fircg_XEmboxOp : fircg_Op<"ext_embox", 
[AttrSizedOperandSegments]> {
- substring: A substring operator (offset, length) for CHARACTER.
- LEN type parameters: A vector of runtime LEN type parameters that
  describe an correspond to the elemental derived type.
+   - allocator_idx: specify special allocator to use.
 
 The memref and shape arguments are mandatory. The rest are optional.
   }];
@@ -60,7 +61,8 @@ def fircg_XEmboxOp : fircg_Op<"ext_embox", 
[AttrSizedOperandSegments]> {
 Variadic:$subcomponent,
 Variadic:$substr,
 Variadic:$lenParams,
-Optional:$sourceBox
+Optional:$sourceBox,
+OptionalAttr:$allocator_idx
   );
   let results = (outs BoxOrClassType);
 
diff --git a/flang/include/flang/Optimizer/Dialect/FIROps.td 
b/flang/include/flang/Optimizer/Dialect/FIROps.td
index bee8e8f603ce3..7856fa7d90184 100644
--- a/flang/include/flang/Optimizer/Dialect/FIROps.td
+++ b/flang/include/flang/Optimizer/Dialect/FIROps.td
@@ -783,6 +783,7 @@ def fir_EmboxOp : fir_Op<"embox", [NoMemoryEffect, 
AttrSizedOperandSegments]> {
 - slice: an array section can be described with a slice triple,
 - typeparams: for emboxing a derived type with LEN type parameters,
 - accessMap: unused/experimental.
+- allocator_idx: specify special allocator to use.
   }];
 
   let arguments = (ins
@@ -791,7 +792,8 @@ def fir_EmboxOp : fir_Op<"embox", [NoMemoryEffect, 
AttrSizedOperandSegments]> {
 Optional:$slice,
 Variadic:$typeparams,
 Optional:$sourceBox,
-OptionalAttr:$accessMap
+OptionalAttr:$accessMap,
+OptionalAttr:$allocator_idx
   );
 
   let results = (outs BoxOrClassType);
@@ -801,9 +803,11 @@ def fir_EmboxOp : fir_Op<"embox", [NoMemoryEffect, 
AttrSizedOperandSegments]> {
   "mlir::Value":$memref, CArg<"mlir::Value", "{}">:$shape,
   CArg<"mlir::Value", "{}">:$slice,
   CArg<"mlir::ValueRange", "{}">:$typeparams,
-  CArg<"mlir::Value", "{}">:$sourceBox),
+  CArg<"mlir::Value", "{}">:$sourceBox,
+  CArg<"mlir::IntegerAttr", "{}">:$allocator_idx),
 [{ return build($_builder, $_state, resultTypes, memref, shape, slice,
-typeparams, sourceBox, mlir::AffineMapAttr{}); }]>
+typeparams, sourceBox, mlir::AffineMapAttr{},
+allocator_idx); }]>
   ];
 
   let assemblyFormat = [{
diff --git a/flang/runtime/allocator-registry.h 
b/flang/include/flang/Runtime/allocator-registry.h
similarity index 90%
rename from flang/runtime/allocator-registry.h
rename to flang/include/flang/Runtime/allocator-registry.h
index 3243e1deab630..c481bec8e8e51 100644
--- a/flang/runtime/allocator-registry.h
+++ b/flang/include/flang/Runtime/allocator-registry.h
@@ -13,6 +13,8 @@
 #include 
 #include 
 
+static constexpr unsigned kDefaultAllocator = 0;
+
 #define MAX_ALLOCATOR 5
 
 namespace Fortran::runtime {
@@ -37,7 +39,9 @@ struct AllocatorRegistry {
   RT_API_ATTRS constexpr AllocatorRegistry()
   : allocators{{, }} {}
 #else
-  constexpr AllocatorRegistry() { allocators[0] = {::malloc, ::free}; 
};
+  constexpr AllocatorRegistry() {
+allocators[kDefaultAllocator] = {::malloc, ::free};
+  };
 #endif
   RT_API_ATTRS void Register(int, Allocator_t);
   RT_API_ATTRS AllocFct GetAllocator(int pos);
diff --git a/flang/lib/Optimizer/CodeGen/CodeGen.cpp 
b/flang/lib/Optimizer/CodeGen/CodeGen.cpp
index 4a98585c34c7d..412cc4f1a020c 100644
--- a/flang/lib/Optimizer/CodeGen/CodeGen.cpp
+++ b/flang/lib/Optimizer/CodeGen/CodeGen.cpp
@@ -23,6 +23,7 @@
 #include "flang/Optimizer/Support/InternalNames.h"
 #include "flang/Optimizer/Support/TypeCode.h"
 #include "flang/Optimizer/Support/Utils.h"
+#include "flang/Runtime/allocator-registry.h"
 #include 

[llvm-branch-commits] [flang] [flang][cuda] Add CUF allocator (PR #101216)

2024-07-30 Thread Valentin Clement バレンタイン クレメン via llvm-branch-commits

https://github.com/clementval created 
https://github.com/llvm/llvm-project/pull/101216

Add allocators for CUDA fortran allocation on the device. 3 allocators are 
added for pinned, device and managed/unified memory allocation. 
`CUFRegisterAllocator()` is called to register the allocators in the allocator 
registry added in #100690.


 Since this require CUDA, a cmake option `FLANG_CUF_RUNTIME` is added to 
conditionally build these.  

>From 825e6efbbe20041b2b1591617f32abc12a0b42ff Mon Sep 17 00:00:00 2001
From: Valentin Clement 
Date: Fri, 12 Jul 2024 15:20:12 -0700
Subject: [PATCH] [flang][cuda] Add CUF allocator

---
 flang/CMakeLists.txt  |  7 ++
 flang/include/flang/Runtime/CUDA/allocator.h  | 43 +
 flang/runtime/CMakeLists.txt  |  3 +
 flang/runtime/CUDA/CMakeLists.txt | 18 
 flang/runtime/CUDA/allocator.cpp  | 62 +
 flang/unittests/Runtime/CMakeLists.txt|  2 +
 flang/unittests/Runtime/CUDA/AllocatorCUF.cpp | 87 +++
 flang/unittests/Runtime/CUDA/CMakeLists.txt   | 15 
 8 files changed, 237 insertions(+)
 create mode 100644 flang/include/flang/Runtime/CUDA/allocator.h
 create mode 100644 flang/runtime/CUDA/CMakeLists.txt
 create mode 100644 flang/runtime/CUDA/allocator.cpp
 create mode 100644 flang/unittests/Runtime/CUDA/AllocatorCUF.cpp
 create mode 100644 flang/unittests/Runtime/CUDA/CMakeLists.txt

diff --git a/flang/CMakeLists.txt b/flang/CMakeLists.txt
index 070c39eb6e9ab..971e5d5c93f23 100644
--- a/flang/CMakeLists.txt
+++ b/flang/CMakeLists.txt
@@ -461,6 +461,13 @@ option(FLANG_BUILD_TOOLS
 if (FLANG_BUILD_TOOLS)
   add_subdirectory(tools)
 endif()
+
+option(FLANG_CUF_RUNTIME
+  "Compile CUDA Fortran runtime sources" OFF)
+if (FLANG_CUF_RUNTIME)
+  find_package(CUDAToolkit REQUIRED)
+endif()
+
 add_subdirectory(runtime)
 
 if (LLVM_INCLUDE_EXAMPLES)
diff --git a/flang/include/flang/Runtime/CUDA/allocator.h 
b/flang/include/flang/Runtime/CUDA/allocator.h
new file mode 100644
index 0..0738d1e3a8bf3
--- /dev/null
+++ b/flang/include/flang/Runtime/CUDA/allocator.h
@@ -0,0 +1,43 @@
+//===-- include/flang/Runtime/CUDA/allocator.h --*- C++ 
-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+
+#ifndef FORTRAN_RUNTIME_CUDA_ALLOCATOR_H_
+#define FORTRAN_RUNTIME_CUDA_ALLOCATOR_H_
+
+#include "flang/Runtime/descriptor.h"
+
+static constexpr unsigned kPinnedAllocatorPos = 1;
+static constexpr unsigned kDeviceAllocatorPos = 2;
+static constexpr unsigned kManagedAllocatorPos = 3;
+
+#define CUDA_REPORT_IF_ERROR(expr) \
+  [](CUresult result) { \
+if (!result) \
+  return; \
+const char *name = nullptr; \
+cuGetErrorName(result, ); \
+if (!name) \
+  name = ""; \
+fprintf(stderr, "'%s' failed with '%s'\n", #expr, name); \
+  }(expr)
+
+namespace Fortran::runtime::cuf {
+
+void CUFRegisterAllocator();
+
+void *CUFAllocPinned(std::size_t);
+void CUFFreePinned(void *);
+
+void *CUFAllocDevice(std::size_t);
+void CUFFreeDevice(void *);
+
+void *CUFAllocManaged(std::size_t);
+void CUFFreeManaged(void *);
+
+} // namespace Fortran::runtime::cuf
+#endif // FORTRAN_RUNTIME_CUDA_ALLOCATOR_H_
diff --git a/flang/runtime/CMakeLists.txt b/flang/runtime/CMakeLists.txt
index 1f3ae23dcbf12..4537b2d059d65 100644
--- a/flang/runtime/CMakeLists.txt
+++ b/flang/runtime/CMakeLists.txt
@@ -309,3 +309,6 @@ if (TARGET flang-new AND TARGET module_files)
   add_dependencies(FortranRuntime flang-new module_files)
 endif()
 
+if (FLANG_CUF_RUNTIME)
+  add_subdirectory(CUDA)
+endif()
diff --git a/flang/runtime/CUDA/CMakeLists.txt 
b/flang/runtime/CUDA/CMakeLists.txt
new file mode 100644
index 0..e963b6062abc4
--- /dev/null
+++ b/flang/runtime/CUDA/CMakeLists.txt
@@ -0,0 +1,18 @@
+#===-- runtime/CUDA/CMakeLists.txt 
-===#
+#
+# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+#
+#======#
+
+include_directories(${CUDAToolkit_INCLUDE_DIRS})
+find_library(CUDA_RUNTIME_LIBRARY cuda HINTS 
${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES} REQUIRED)
+
+add_flang_library(CufRuntime
+  allocator.cpp
+)
+target_link_libraries(CufRuntime
+PRIVATE
+${CUDA_RUNTIME_LIBRARY}
+)
diff --git a/flang/runtime/CUDA/allocator.cpp b/flang/runtime/CUDA/allocator.cpp
new file mode 100644
index 0..3c913e344335b
--- /dev/null
+++ b/flang/runtime/CUDA/allocator.cpp
@@ -0,0 +1,62 @@
+//===-- runtime/CUDA/allocator.cpp 
===//
+//
+// 

[llvm-branch-commits] [flang] [flang] Add allocator_idx attribute on fir.embox and fircg.ext_embox (PR #101212)

2024-07-30 Thread Valentin Clement バレンタイン クレメン via llvm-branch-commits

https://github.com/clementval created 
https://github.com/llvm/llvm-project/pull/101212

#100690 introduces allocator registry with the ability to store allocator index 
in the descriptor. This patch adds an attribute to fir.embox and 
fircg.ext_embox to be able to set the allocator index while populating the 
descriptor fields. 

>From 77727fdf40e1164d9975378bb6951bc49baaf04a Mon Sep 17 00:00:00 2001
From: Valentin Clement 
Date: Mon, 29 Jul 2024 14:50:05 -0700
Subject: [PATCH] [flang] Add allocator_idx attribute on fir.embox and
 fircg.ext_embox

---
 .../include/flang/Optimizer/CodeGen/CGOps.td  |  4 ++-
 .../include/flang/Optimizer/Dialect/FIROps.td | 10 +--
 .../flang/Runtime}/allocator-registry.h   |  6 +++-
 flang/lib/Optimizer/CodeGen/CodeGen.cpp   | 28 ++-
 flang/lib/Optimizer/CodeGen/PreCGRewrite.cpp  |  4 +--
 flang/runtime/allocator-registry.cpp  |  2 +-
 flang/runtime/descriptor.cpp  |  2 +-
 flang/test/Fir/embox.fir  | 24 
 8 files changed, 64 insertions(+), 16 deletions(-)
 rename flang/{runtime => include/flang/Runtime}/allocator-registry.h (90%)

diff --git a/flang/include/flang/Optimizer/CodeGen/CGOps.td 
b/flang/include/flang/Optimizer/CodeGen/CGOps.td
index f4740a263ffd2..34c5dc07284f0 100644
--- a/flang/include/flang/Optimizer/CodeGen/CGOps.td
+++ b/flang/include/flang/Optimizer/CodeGen/CGOps.td
@@ -48,6 +48,7 @@ def fircg_XEmboxOp : fircg_Op<"ext_embox", 
[AttrSizedOperandSegments]> {
- substring: A substring operator (offset, length) for CHARACTER.
- LEN type parameters: A vector of runtime LEN type parameters that
  describe an correspond to the elemental derived type.
+   - allocator_idx: specify special allocator to use.
 
 The memref and shape arguments are mandatory. The rest are optional.
   }];
@@ -60,7 +61,8 @@ def fircg_XEmboxOp : fircg_Op<"ext_embox", 
[AttrSizedOperandSegments]> {
 Variadic:$subcomponent,
 Variadic:$substr,
 Variadic:$lenParams,
-Optional:$sourceBox
+Optional:$sourceBox,
+OptionalAttr:$allocator_idx
   );
   let results = (outs BoxOrClassType);
 
diff --git a/flang/include/flang/Optimizer/Dialect/FIROps.td 
b/flang/include/flang/Optimizer/Dialect/FIROps.td
index bee8e8f603ce3..7856fa7d90184 100644
--- a/flang/include/flang/Optimizer/Dialect/FIROps.td
+++ b/flang/include/flang/Optimizer/Dialect/FIROps.td
@@ -783,6 +783,7 @@ def fir_EmboxOp : fir_Op<"embox", [NoMemoryEffect, 
AttrSizedOperandSegments]> {
 - slice: an array section can be described with a slice triple,
 - typeparams: for emboxing a derived type with LEN type parameters,
 - accessMap: unused/experimental.
+- allocator_idx: specify special allocator to use.
   }];
 
   let arguments = (ins
@@ -791,7 +792,8 @@ def fir_EmboxOp : fir_Op<"embox", [NoMemoryEffect, 
AttrSizedOperandSegments]> {
 Optional:$slice,
 Variadic:$typeparams,
 Optional:$sourceBox,
-OptionalAttr:$accessMap
+OptionalAttr:$accessMap,
+OptionalAttr:$allocator_idx
   );
 
   let results = (outs BoxOrClassType);
@@ -801,9 +803,11 @@ def fir_EmboxOp : fir_Op<"embox", [NoMemoryEffect, 
AttrSizedOperandSegments]> {
   "mlir::Value":$memref, CArg<"mlir::Value", "{}">:$shape,
   CArg<"mlir::Value", "{}">:$slice,
   CArg<"mlir::ValueRange", "{}">:$typeparams,
-  CArg<"mlir::Value", "{}">:$sourceBox),
+  CArg<"mlir::Value", "{}">:$sourceBox,
+  CArg<"mlir::IntegerAttr", "{}">:$allocator_idx),
 [{ return build($_builder, $_state, resultTypes, memref, shape, slice,
-typeparams, sourceBox, mlir::AffineMapAttr{}); }]>
+typeparams, sourceBox, mlir::AffineMapAttr{},
+allocator_idx); }]>
   ];
 
   let assemblyFormat = [{
diff --git a/flang/runtime/allocator-registry.h 
b/flang/include/flang/Runtime/allocator-registry.h
similarity index 90%
rename from flang/runtime/allocator-registry.h
rename to flang/include/flang/Runtime/allocator-registry.h
index 3243e1deab630..c481bec8e8e51 100644
--- a/flang/runtime/allocator-registry.h
+++ b/flang/include/flang/Runtime/allocator-registry.h
@@ -13,6 +13,8 @@
 #include 
 #include 
 
+static constexpr unsigned kDefaultAllocator = 0;
+
 #define MAX_ALLOCATOR 5
 
 namespace Fortran::runtime {
@@ -37,7 +39,9 @@ struct AllocatorRegistry {
   RT_API_ATTRS constexpr AllocatorRegistry()
   : allocators{{, }} {}
 #else
-  constexpr AllocatorRegistry() { allocators[0] = {::malloc, ::free}; 
};
+  constexpr AllocatorRegistry() {
+allocators[kDefaultAllocator] = {::malloc, ::free};
+  };
 #endif
   RT_API_ATTRS void Register(int, Allocator_t);
   RT_API_ATTRS AllocFct GetAllocator(int pos);
diff --git a/flang/lib/Optimizer/CodeGen/CodeGen.cpp 
b/flang/lib/Optimizer/CodeGen/CodeGen.cpp
index 4a98585c34c7d..412cc4f1a020c 100644
--- a/flang/lib/Optimizer/CodeGen/CodeGen.cpp
+++ b/flang/lib/Optimizer/CodeGen/CodeGen.cpp
@@ -23,6 

[llvm-branch-commits] [flang] [flang] Lower REDUCE intrinsic for reduction op with args by value (PR #95353)

2024-06-13 Thread Valentin Clement バレンタイン クレメン via llvm-branch-commits


@@ -5745,6 +5745,14 @@ IntrinsicLibrary::genReduce(mlir::Type resultType,
   int rank = arrayTmp.rank();
   assert(rank >= 1);
 
+  // Arguements to the reduction operation are passed by reference or value?
+  bool argByRef = true;
+  if (auto embox =
+  mlir::dyn_cast_or_null(operation.getDefiningOp())) 
{

clementval wrote:

> Does REDUCE works with dummy procedure and procedure pointers? If so it would 
> be good to add tests for those cases to ensure the pattern matching here 
> works with them.

I'll check if this is supported and add proper test if it is. 

https://github.com/llvm/llvm-project/pull/95353
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [flang] [flang] Lower REDUCE intrinsic for reduction op with args by value (PR #95353)

2024-06-12 Thread Valentin Clement バレンタイン クレメン via llvm-branch-commits

https://github.com/clementval created 
https://github.com/llvm/llvm-project/pull/95353

#95297 Updates the runtime entry points to distinguish between reduction 
operation with arguments passed by value or by reference. Add lowering to 
support the arguments passed by value. 

>From defadc4f18b0b4b369a3657a0f6e4c9f79ffd793 Mon Sep 17 00:00:00 2001
From: Valentin Clement 
Date: Wed, 12 Jun 2024 15:28:31 -0700
Subject: [PATCH] [flang] Update lowering of REDUCE intrinsic for reduction
 operation with args by value

---
 .../Optimizer/Builder/Runtime/RTBuilder.h |  22 +
 .../Optimizer/Builder/Runtime/Reduction.h |   8 +-
 flang/lib/Optimizer/Builder/IntrinsicCall.cpp |  16 +-
 .../Optimizer/Builder/Runtime/Reduction.cpp   | 468 --
 flang/test/Lower/Intrinsics/reduce.f90| 235 -
 5 files changed, 674 insertions(+), 75 deletions(-)

diff --git a/flang/include/flang/Optimizer/Builder/Runtime/RTBuilder.h 
b/flang/include/flang/Optimizer/Builder/Runtime/RTBuilder.h
index 809d5b8d569dc..845ba385918d0 100644
--- a/flang/include/flang/Optimizer/Builder/Runtime/RTBuilder.h
+++ b/flang/include/flang/Optimizer/Builder/Runtime/RTBuilder.h
@@ -64,6 +64,18 @@ using FuncTypeBuilderFunc = mlir::FunctionType 
(*)(mlir::MLIRContext *);
 }; 
\
   }
 
+#define REDUCTION_VALUE_OPERATION_MODEL(T) 
\
+  template <>  
\
+  constexpr TypeBuilderFunc
\
+  getModel>() {   
\
+return [](mlir::MLIRContext *context) -> mlir::Type {  
\
+  TypeBuilderFunc f{getModel()};
\
+  auto refTy = fir::ReferenceType::get(f(context));
\
+  return mlir::FunctionType::get(context, {f(context), f(context)},
\
+ refTy);   
\
+}; 
\
+  }
+
 #define REDUCTION_CHAR_OPERATION_MODEL(T)  
\
   template <>  
\
   constexpr TypeBuilderFunc
\
@@ -481,17 +493,27 @@ constexpr TypeBuilderFunc getModel() {
 }
 
 REDUCTION_REF_OPERATION_MODEL(std::int8_t)
+REDUCTION_VALUE_OPERATION_MODEL(std::int8_t)
 REDUCTION_REF_OPERATION_MODEL(std::int16_t)
+REDUCTION_VALUE_OPERATION_MODEL(std::int16_t)
 REDUCTION_REF_OPERATION_MODEL(std::int32_t)
+REDUCTION_VALUE_OPERATION_MODEL(std::int32_t)
 REDUCTION_REF_OPERATION_MODEL(std::int64_t)
+REDUCTION_VALUE_OPERATION_MODEL(std::int64_t)
 REDUCTION_REF_OPERATION_MODEL(Fortran::common::int128_t)
+REDUCTION_VALUE_OPERATION_MODEL(Fortran::common::int128_t)
 
 REDUCTION_REF_OPERATION_MODEL(float)
+REDUCTION_VALUE_OPERATION_MODEL(float)
 REDUCTION_REF_OPERATION_MODEL(double)
+REDUCTION_VALUE_OPERATION_MODEL(double)
 REDUCTION_REF_OPERATION_MODEL(long double)
+REDUCTION_VALUE_OPERATION_MODEL(long double)
 
 REDUCTION_REF_OPERATION_MODEL(std::complex)
+REDUCTION_VALUE_OPERATION_MODEL(std::complex)
 REDUCTION_REF_OPERATION_MODEL(std::complex)
+REDUCTION_VALUE_OPERATION_MODEL(std::complex)
 
 REDUCTION_CHAR_OPERATION_MODEL(char)
 REDUCTION_CHAR_OPERATION_MODEL(char16_t)
diff --git a/flang/include/flang/Optimizer/Builder/Runtime/Reduction.h 
b/flang/include/flang/Optimizer/Builder/Runtime/Reduction.h
index fedf453a6dc8d..2a40cddc0cc2c 100644
--- a/flang/include/flang/Optimizer/Builder/Runtime/Reduction.h
+++ b/flang/include/flang/Optimizer/Builder/Runtime/Reduction.h
@@ -229,8 +229,8 @@ void genIParityDim(fir::FirOpBuilder , 
mlir::Location loc,
 /// result value. This is used for COMPLEX, CHARACTER and DERIVED TYPES.
 void genReduce(fir::FirOpBuilder , mlir::Location loc,
mlir::Value arrayBox, mlir::Value operation, mlir::Value 
maskBox,
-   mlir::Value identity, mlir::Value ordered,
-   mlir::Value resultBox);
+   mlir::Value identity, mlir::Value ordered, mlir::Value 
resultBox,
+   bool argByRef);
 
 /// Generate call to `Reduce` intrinsic runtime routine. This is the version
 /// that does not take a dim argument and return a scalare result. This is used
@@ -238,14 +238,14 @@ void genReduce(fir::FirOpBuilder , mlir::Location 
loc,
 mlir::Value genReduce(fir::FirOpBuilder , mlir::Location loc,
   mlir::Value arrayBox, mlir::Value operation,
   mlir::Value maskBox, mlir::Value identity,
-  mlir::Value ordered);
+  mlir::Value ordered, bool argByRef);
 
 /// Generate call to `Reduce` intrinsic runtime routine. This is the version
 /// that takes arrays of any rank with a dim argument specified.
 void 

[llvm-branch-commits] [flang] [flang] Lower REDUCE intrinsic with DIM argument (PR #94771)

2024-06-10 Thread Valentin Clement バレンタイン クレメン via llvm-branch-commits

https://github.com/clementval updated 
https://github.com/llvm/llvm-project/pull/94771

>From 87a01d458650f4c6fd9b0456acbca51094bd127b Mon Sep 17 00:00:00 2001
From: Valentin Clement 
Date: Thu, 6 Jun 2024 14:17:44 -0700
Subject: [PATCH] [flang] Lower REDUCE intrinsic with DIM argument

---
 .../Optimizer/Builder/Runtime/Reduction.h |   7 +
 flang/lib/Optimizer/Builder/IntrinsicCall.cpp |  12 +-
 .../Optimizer/Builder/Runtime/Reduction.cpp   | 204 
 flang/test/Lower/Intrinsics/reduce.f90| 221 ++
 4 files changed, 443 insertions(+), 1 deletion(-)

diff --git a/flang/include/flang/Optimizer/Builder/Runtime/Reduction.h 
b/flang/include/flang/Optimizer/Builder/Runtime/Reduction.h
index 27652208b524e..fedf453a6dc8d 100644
--- a/flang/include/flang/Optimizer/Builder/Runtime/Reduction.h
+++ b/flang/include/flang/Optimizer/Builder/Runtime/Reduction.h
@@ -240,6 +240,13 @@ mlir::Value genReduce(fir::FirOpBuilder , 
mlir::Location loc,
   mlir::Value maskBox, mlir::Value identity,
   mlir::Value ordered);
 
+/// Generate call to `Reduce` intrinsic runtime routine. This is the version
+/// that takes arrays of any rank with a dim argument specified.
+void genReduceDim(fir::FirOpBuilder , mlir::Location loc,
+  mlir::Value arrayBox, mlir::Value operation, mlir::Value dim,
+  mlir::Value maskBox, mlir::Value identity,
+  mlir::Value ordered, mlir::Value resultBox);
+
 } // namespace fir::runtime
 
 #endif // FORTRAN_OPTIMIZER_BUILDER_RUNTIME_REDUCTION_H
diff --git a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp 
b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp
index 0e29849a57688..e250a476b5802 100644
--- a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp
+++ b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp
@@ -5790,7 +5790,17 @@ IntrinsicLibrary::genReduce(mlir::Type resultType,
 return fir::runtime::genReduce(builder, loc, array, operation, mask,
identity, ordered);
   }
-  TODO(loc, "reduce with array result");
+  // Handle cases that have an array result.
+  // Create mutable fir.box to be passed to the runtime for the result.
+  mlir::Type resultArrayType = builder.getVarLenSeqTy(resultType, rank - 1);
+  fir::MutableBoxValue resultMutableBox =
+  fir::factory::createTempMutableBox(builder, loc, resultArrayType);
+  mlir::Value resultIrBox =
+  fir::factory::getMutableIRBox(builder, loc, resultMutableBox);
+  mlir::Value dim = fir::getBase(args[2]);
+  fir::runtime::genReduceDim(builder, loc, array, operation, dim, mask,
+ identity, ordered, resultIrBox);
+  return readAndAddCleanUp(resultMutableBox, resultType, "REDUCE");
 }
 
 // REPEAT
diff --git a/flang/lib/Optimizer/Builder/Runtime/Reduction.cpp 
b/flang/lib/Optimizer/Builder/Runtime/Reduction.cpp
index a7cd53328d69a..9b035e6b4dd06 100644
--- a/flang/lib/Optimizer/Builder/Runtime/Reduction.cpp
+++ b/flang/lib/Optimizer/Builder/Runtime/Reduction.cpp
@@ -486,6 +486,50 @@ struct ForcedReduceReal16 {
   }
 };
 
+/// Placeholder for DIM real*10 version of Reduce Intrinsic
+struct ForcedReduceReal10Dim {
+  static constexpr const char *name =
+  ExpandAndQuoteKey(RTNAME(ReduceReal10Dim));
+  static constexpr fir::runtime::FuncTypeBuilderFunc getTypeModel() {
+return [](mlir::MLIRContext *ctx) {
+  auto ty = mlir::FloatType::getF80(ctx);
+  auto boxTy =
+  fir::runtime::getModel()(ctx);
+  auto opTy = mlir::FunctionType::get(ctx, {ty, ty}, ty);
+  auto strTy = fir::ReferenceType::get(mlir::IntegerType::get(ctx, 8));
+  auto intTy = mlir::IntegerType::get(ctx, 8 * sizeof(int));
+  auto refTy = fir::ReferenceType::get(ty);
+  auto refBoxTy = fir::ReferenceType::get(boxTy);
+  auto i1Ty = mlir::IntegerType::get(ctx, 1);
+  return mlir::FunctionType::get(
+  ctx, {refBoxTy, boxTy, opTy, strTy, intTy, intTy, boxTy, refTy, 
i1Ty},
+  {});
+};
+  }
+};
+
+/// Placeholder for DIM real*16 version of Reduce Intrinsic
+struct ForcedReduceReal16Dim {
+  static constexpr const char *name =
+  ExpandAndQuoteKey(RTNAME(ReduceReal16Dim));
+  static constexpr fir::runtime::FuncTypeBuilderFunc getTypeModel() {
+return [](mlir::MLIRContext *ctx) {
+  auto ty = mlir::FloatType::getF128(ctx);
+  auto boxTy =
+  fir::runtime::getModel()(ctx);
+  auto opTy = mlir::FunctionType::get(ctx, {ty, ty}, ty);
+  auto strTy = fir::ReferenceType::get(mlir::IntegerType::get(ctx, 8));
+  auto intTy = mlir::IntegerType::get(ctx, 8 * sizeof(int));
+  auto refTy = fir::ReferenceType::get(ty);
+  auto refBoxTy = fir::ReferenceType::get(boxTy);
+  auto i1Ty = mlir::IntegerType::get(ctx, 1);
+  return mlir::FunctionType::get(
+  ctx, {refBoxTy, boxTy, opTy, strTy, intTy, intTy, boxTy, refTy, 
i1Ty},
+  {});
+};
+  }
+};
+
 /// Placeholder for integer*16 version of 

[llvm-branch-commits] [flang] [flang] Lower REDUCE intrinsic with DIM argument (PR #94771)

2024-06-07 Thread Valentin Clement バレンタイン クレメン via llvm-branch-commits
Valentin Clement =?utf-8?b?KOODkOODrOODsw=?=,Valentin Clement
 ,Valentin Clement 
Message-ID:
In-Reply-To: 


https://github.com/clementval updated 
https://github.com/llvm/llvm-project/pull/94771

>From fd911977863888c7c005f00ae05049b32fe9d4d6 Mon Sep 17 00:00:00 2001
From: Valentin Clement 
Date: Thu, 6 Jun 2024 14:17:44 -0700
Subject: [PATCH 1/4] [flang] Lower REDUCE intrinsic with DIM argument

---
 .../Optimizer/Builder/Runtime/Reduction.h |   7 +
 flang/lib/Optimizer/Builder/IntrinsicCall.cpp |  12 +-
 .../Optimizer/Builder/Runtime/Reduction.cpp   | 186 ++-
 flang/test/Lower/Intrinsics/reduce.f90| 221 ++
 4 files changed, 423 insertions(+), 3 deletions(-)

diff --git a/flang/include/flang/Optimizer/Builder/Runtime/Reduction.h 
b/flang/include/flang/Optimizer/Builder/Runtime/Reduction.h
index 27652208b524e..fedf453a6dc8d 100644
--- a/flang/include/flang/Optimizer/Builder/Runtime/Reduction.h
+++ b/flang/include/flang/Optimizer/Builder/Runtime/Reduction.h
@@ -240,6 +240,13 @@ mlir::Value genReduce(fir::FirOpBuilder , 
mlir::Location loc,
   mlir::Value maskBox, mlir::Value identity,
   mlir::Value ordered);
 
+/// Generate call to `Reduce` intrinsic runtime routine. This is the version
+/// that takes arrays of any rank with a dim argument specified.
+void genReduceDim(fir::FirOpBuilder , mlir::Location loc,
+  mlir::Value arrayBox, mlir::Value operation, mlir::Value dim,
+  mlir::Value maskBox, mlir::Value identity,
+  mlir::Value ordered, mlir::Value resultBox);
+
 } // namespace fir::runtime
 
 #endif // FORTRAN_OPTIMIZER_BUILDER_RUNTIME_REDUCTION_H
diff --git a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp 
b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp
index 0e29849a57688..e250a476b5802 100644
--- a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp
+++ b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp
@@ -5790,7 +5790,17 @@ IntrinsicLibrary::genReduce(mlir::Type resultType,
 return fir::runtime::genReduce(builder, loc, array, operation, mask,
identity, ordered);
   }
-  TODO(loc, "reduce with array result");
+  // Handle cases that have an array result.
+  // Create mutable fir.box to be passed to the runtime for the result.
+  mlir::Type resultArrayType = builder.getVarLenSeqTy(resultType, rank - 1);
+  fir::MutableBoxValue resultMutableBox =
+  fir::factory::createTempMutableBox(builder, loc, resultArrayType);
+  mlir::Value resultIrBox =
+  fir::factory::getMutableIRBox(builder, loc, resultMutableBox);
+  mlir::Value dim = fir::getBase(args[2]);
+  fir::runtime::genReduceDim(builder, loc, array, operation, dim, mask,
+ identity, ordered, resultIrBox);
+  return readAndAddCleanUp(resultMutableBox, resultType, "REDUCE");
 }
 
 // REPEAT
diff --git a/flang/lib/Optimizer/Builder/Runtime/Reduction.cpp 
b/flang/lib/Optimizer/Builder/Runtime/Reduction.cpp
index a7cd53328d69a..e83af63916dcd 100644
--- a/flang/lib/Optimizer/Builder/Runtime/Reduction.cpp
+++ b/flang/lib/Optimizer/Builder/Runtime/Reduction.cpp
@@ -486,6 +486,28 @@ struct ForcedReduceReal16 {
   }
 };
 
+/// Placeholder for DIM real*16 version of Reduce Intrinsic
+struct ForcedReduceReal16Dim {
+  static constexpr const char *name =
+  ExpandAndQuoteKey(RTNAME(ReduceReal16Dim));
+  static constexpr fir::runtime::FuncTypeBuilderFunc getTypeModel() {
+return [](mlir::MLIRContext *ctx) {
+  auto ty = mlir::FloatType::getF128(ctx);
+  auto boxTy =
+  fir::runtime::getModel()(ctx);
+  auto opTy = mlir::FunctionType::get(ctx, {ty, ty}, ty);
+  auto strTy = fir::ReferenceType::get(mlir::IntegerType::get(ctx, 8));
+  auto intTy = mlir::IntegerType::get(ctx, 8 * sizeof(int));
+  auto refTy = fir::ReferenceType::get(ty);
+  auto refBoxTy = fir::ReferenceType::get(boxTy);
+  auto i1Ty = mlir::IntegerType::get(ctx, 1);
+  return mlir::FunctionType::get(
+  ctx, {refBoxTy, boxTy, opTy, strTy, intTy, intTy, boxTy, refTy, 
i1Ty},
+  {});
+};
+  }
+};
+
 /// Placeholder for integer*16 version of Reduce Intrinsic
 struct ForcedReduceInteger16 {
   static constexpr const char *name =
@@ -506,6 +528,28 @@ struct ForcedReduceInteger16 {
   }
 };
 
+/// Placeholder for DIM integer*16 version of Reduce Intrinsic
+struct ForcedReduceInteger16Dim {
+  static constexpr const char *name =
+  ExpandAndQuoteKey(RTNAME(ReduceInteger16Dim));
+  static constexpr fir::runtime::FuncTypeBuilderFunc getTypeModel() {
+return [](mlir::MLIRContext *ctx) {
+  auto ty = mlir::IntegerType::get(ctx, 128);
+  auto boxTy =
+  fir::runtime::getModel()(ctx);
+  auto opTy = mlir::FunctionType::get(ctx, {ty, ty}, ty);
+  auto strTy = fir::ReferenceType::get(mlir::IntegerType::get(ctx, 8));
+  auto intTy = mlir::IntegerType::get(ctx, 8 * sizeof(int));
+  auto refTy = 

[llvm-branch-commits] [flang] [flang] Lower REDUCE intrinsic with DIM argument (PR #94771)

2024-06-07 Thread Valentin Clement バレンタイン クレメン via llvm-branch-commits
Valentin Clement =?utf-8?b?KOODkOODrOODsw=?=,Valentin Clement
 
Message-ID:
In-Reply-To: 


clementval wrote:

> Beware that the windows failure seems related to this change or the previous 
> one: 
> 
> ```
> 
> C:\ws\src\flang\lib\Optimizer\Builder\Runtime\Reduction.cpp(1499): error 
> C2065: '_FortranAReduceReal10': undeclared identifier
> 
> ```
> 
> 
> 
> I think you need "Forced" definition for the REAL*10 too.

I'll look at that! Thanks

https://github.com/llvm/llvm-project/pull/94771
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [flang] [flang] Lower REDUCE intrinsic with DIM argument (PR #94771)

2024-06-07 Thread Valentin Clement バレンタイン クレメン via llvm-branch-commits
Valentin Clement =?utf-8?b?KOODkOODrOODsw=?=,Valentin Clement
 
Message-ID:
In-Reply-To: 


clementval wrote:

Thanks Slava and Jean for the review! I fixed the mismatch in 
`ForcedReduceComplex16` and `ForcedReduceComplex16Dim`. Thanks Slava for 
catching that. 

https://github.com/llvm/llvm-project/pull/94771
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [flang] [flang] Lower REDUCE intrinsic with DIM argument (PR #94771)

2024-06-07 Thread Valentin Clement バレンタイン クレメン via llvm-branch-commits
Valentin Clement =?utf-8?b?KOODkOODrOODsw=?=,Valentin Clement
 
Message-ID:
In-Reply-To: 


https://github.com/clementval updated 
https://github.com/llvm/llvm-project/pull/94771

>From fd911977863888c7c005f00ae05049b32fe9d4d6 Mon Sep 17 00:00:00 2001
From: Valentin Clement 
Date: Thu, 6 Jun 2024 14:17:44 -0700
Subject: [PATCH 1/3] [flang] Lower REDUCE intrinsic with DIM argument

---
 .../Optimizer/Builder/Runtime/Reduction.h |   7 +
 flang/lib/Optimizer/Builder/IntrinsicCall.cpp |  12 +-
 .../Optimizer/Builder/Runtime/Reduction.cpp   | 186 ++-
 flang/test/Lower/Intrinsics/reduce.f90| 221 ++
 4 files changed, 423 insertions(+), 3 deletions(-)

diff --git a/flang/include/flang/Optimizer/Builder/Runtime/Reduction.h 
b/flang/include/flang/Optimizer/Builder/Runtime/Reduction.h
index 27652208b524e..fedf453a6dc8d 100644
--- a/flang/include/flang/Optimizer/Builder/Runtime/Reduction.h
+++ b/flang/include/flang/Optimizer/Builder/Runtime/Reduction.h
@@ -240,6 +240,13 @@ mlir::Value genReduce(fir::FirOpBuilder , 
mlir::Location loc,
   mlir::Value maskBox, mlir::Value identity,
   mlir::Value ordered);
 
+/// Generate call to `Reduce` intrinsic runtime routine. This is the version
+/// that takes arrays of any rank with a dim argument specified.
+void genReduceDim(fir::FirOpBuilder , mlir::Location loc,
+  mlir::Value arrayBox, mlir::Value operation, mlir::Value dim,
+  mlir::Value maskBox, mlir::Value identity,
+  mlir::Value ordered, mlir::Value resultBox);
+
 } // namespace fir::runtime
 
 #endif // FORTRAN_OPTIMIZER_BUILDER_RUNTIME_REDUCTION_H
diff --git a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp 
b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp
index 0e29849a57688..e250a476b5802 100644
--- a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp
+++ b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp
@@ -5790,7 +5790,17 @@ IntrinsicLibrary::genReduce(mlir::Type resultType,
 return fir::runtime::genReduce(builder, loc, array, operation, mask,
identity, ordered);
   }
-  TODO(loc, "reduce with array result");
+  // Handle cases that have an array result.
+  // Create mutable fir.box to be passed to the runtime for the result.
+  mlir::Type resultArrayType = builder.getVarLenSeqTy(resultType, rank - 1);
+  fir::MutableBoxValue resultMutableBox =
+  fir::factory::createTempMutableBox(builder, loc, resultArrayType);
+  mlir::Value resultIrBox =
+  fir::factory::getMutableIRBox(builder, loc, resultMutableBox);
+  mlir::Value dim = fir::getBase(args[2]);
+  fir::runtime::genReduceDim(builder, loc, array, operation, dim, mask,
+ identity, ordered, resultIrBox);
+  return readAndAddCleanUp(resultMutableBox, resultType, "REDUCE");
 }
 
 // REPEAT
diff --git a/flang/lib/Optimizer/Builder/Runtime/Reduction.cpp 
b/flang/lib/Optimizer/Builder/Runtime/Reduction.cpp
index a7cd53328d69a..e83af63916dcd 100644
--- a/flang/lib/Optimizer/Builder/Runtime/Reduction.cpp
+++ b/flang/lib/Optimizer/Builder/Runtime/Reduction.cpp
@@ -486,6 +486,28 @@ struct ForcedReduceReal16 {
   }
 };
 
+/// Placeholder for DIM real*16 version of Reduce Intrinsic
+struct ForcedReduceReal16Dim {
+  static constexpr const char *name =
+  ExpandAndQuoteKey(RTNAME(ReduceReal16Dim));
+  static constexpr fir::runtime::FuncTypeBuilderFunc getTypeModel() {
+return [](mlir::MLIRContext *ctx) {
+  auto ty = mlir::FloatType::getF128(ctx);
+  auto boxTy =
+  fir::runtime::getModel()(ctx);
+  auto opTy = mlir::FunctionType::get(ctx, {ty, ty}, ty);
+  auto strTy = fir::ReferenceType::get(mlir::IntegerType::get(ctx, 8));
+  auto intTy = mlir::IntegerType::get(ctx, 8 * sizeof(int));
+  auto refTy = fir::ReferenceType::get(ty);
+  auto refBoxTy = fir::ReferenceType::get(boxTy);
+  auto i1Ty = mlir::IntegerType::get(ctx, 1);
+  return mlir::FunctionType::get(
+  ctx, {refBoxTy, boxTy, opTy, strTy, intTy, intTy, boxTy, refTy, 
i1Ty},
+  {});
+};
+  }
+};
+
 /// Placeholder for integer*16 version of Reduce Intrinsic
 struct ForcedReduceInteger16 {
   static constexpr const char *name =
@@ -506,6 +528,28 @@ struct ForcedReduceInteger16 {
   }
 };
 
+/// Placeholder for DIM integer*16 version of Reduce Intrinsic
+struct ForcedReduceInteger16Dim {
+  static constexpr const char *name =
+  ExpandAndQuoteKey(RTNAME(ReduceInteger16Dim));
+  static constexpr fir::runtime::FuncTypeBuilderFunc getTypeModel() {
+return [](mlir::MLIRContext *ctx) {
+  auto ty = mlir::IntegerType::get(ctx, 128);
+  auto boxTy =
+  fir::runtime::getModel()(ctx);
+  auto opTy = mlir::FunctionType::get(ctx, {ty, ty}, ty);
+  auto strTy = fir::ReferenceType::get(mlir::IntegerType::get(ctx, 8));
+  auto intTy = mlir::IntegerType::get(ctx, 8 * sizeof(int));
+  auto refTy = fir::ReferenceType::get(ty);
+

[llvm-branch-commits] [flang] [flang] Lower REDUCE intrinsic with DIM argument (PR #94771)

2024-06-07 Thread Valentin Clement バレンタイン クレメン via llvm-branch-commits
Valentin Clement =?utf-8?b?KOODkOODrOODsw=?Message-ID:
In-Reply-To: 



@@ -540,9 +606,31 @@ struct ForcedReduceComplex16 {
   auto strTy = fir::ReferenceType::get(mlir::IntegerType::get(ctx, 8));
   auto intTy = mlir::IntegerType::get(ctx, 8 * sizeof(int));
   auto refTy = fir::ReferenceType::get(ty);
+  auto refBoxTy = fir::ReferenceType::get(boxTy);

clementval wrote:

Yes you are right. I messed up the this one with the DIM version. I'll revert 
it back. 

https://github.com/llvm/llvm-project/pull/94771
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [flang] [flang] Lower REDUCE intrinsic with DIM argument (PR #94771)

2024-06-07 Thread Valentin Clement バレンタイン クレメン via llvm-branch-commits
Valentin Clement =?utf-8?b?KOODkOODrOODsw=?Message-ID:
In-Reply-To: 



@@ -540,9 +606,31 @@ struct ForcedReduceComplex16 {
   auto strTy = fir::ReferenceType::get(mlir::IntegerType::get(ctx, 8));
   auto intTy = mlir::IntegerType::get(ctx, 8 * sizeof(int));
   auto refTy = fir::ReferenceType::get(ty);
+  auto refBoxTy = fir::ReferenceType::get(boxTy);
   auto i1Ty = mlir::IntegerType::get(ctx, 1);
   return mlir::FunctionType::get(
-  ctx, {refTy, boxTy, opTy, strTy, intTy, intTy, boxTy, refTy, i1Ty},
+  ctx, {refBoxTy, boxTy, opTy, strTy, intTy, intTy, boxTy, refTy, 
i1Ty},
+  {});
+};
+  }
+};
+
+/// Placeholder for Dim complex(16) version of Reduce Intrinsic
+struct ForcedReduceComplex16Dim {
+  static constexpr const char *name =
+  ExpandAndQuoteKey(RTNAME(CppReduceComplex16Dim));
+  static constexpr fir::runtime::FuncTypeBuilderFunc getTypeModel() {
+return [](mlir::MLIRContext *ctx) {
+  auto ty = mlir::ComplexType::get(mlir::FloatType::getF128(ctx));
+  auto boxTy =
+  fir::runtime::getModel()(ctx);
+  auto opTy = mlir::FunctionType::get(ctx, {ty, ty}, ty);
+  auto strTy = fir::ReferenceType::get(mlir::IntegerType::get(ctx, 8));
+  auto intTy = mlir::IntegerType::get(ctx, 8 * sizeof(int));
+  auto refTy = fir::ReferenceType::get(ty);
+  auto i1Ty = mlir::IntegerType::get(ctx, 1);
+  return mlir::FunctionType::get(
+  ctx, {boxTy, boxTy, opTy, strTy, intTy, intTy, boxTy, refTy, i1Ty},

clementval wrote:

Let me double check that. I have probably messade up smth here

https://github.com/llvm/llvm-project/pull/94771
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [flang] [flang] Lower REDUCE intrinsic with DIM argument (PR #94771)

2024-06-07 Thread Valentin Clement バレンタイン クレメン via llvm-branch-commits
Valentin Clement =?utf-8?b?KOODkOODrOODsw=?Message-ID:
In-Reply-To: 



@@ -540,9 +606,31 @@ struct ForcedReduceComplex16 {
   auto strTy = fir::ReferenceType::get(mlir::IntegerType::get(ctx, 8));
   auto intTy = mlir::IntegerType::get(ctx, 8 * sizeof(int));
   auto refTy = fir::ReferenceType::get(ty);
+  auto refBoxTy = fir::ReferenceType::get(boxTy);

clementval wrote:

This is needed because it was actually not tested in the previous patch. I can 
move this change to the other patch since it's not merged yet. 

https://github.com/llvm/llvm-project/pull/94771
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [flang] [flang] Lower REDUCE intrinsic with DIM argument (PR #94771)

2024-06-07 Thread Valentin Clement バレンタイン クレメン via llvm-branch-commits
Valentin Clement =?utf-8?b?KOODkOODrOODsw=?Message-ID:
In-Reply-To: 


https://github.com/clementval updated 
https://github.com/llvm/llvm-project/pull/94771

>From fd911977863888c7c005f00ae05049b32fe9d4d6 Mon Sep 17 00:00:00 2001
From: Valentin Clement 
Date: Thu, 6 Jun 2024 14:17:44 -0700
Subject: [PATCH 1/2] [flang] Lower REDUCE intrinsic with DIM argument

---
 .../Optimizer/Builder/Runtime/Reduction.h |   7 +
 flang/lib/Optimizer/Builder/IntrinsicCall.cpp |  12 +-
 .../Optimizer/Builder/Runtime/Reduction.cpp   | 186 ++-
 flang/test/Lower/Intrinsics/reduce.f90| 221 ++
 4 files changed, 423 insertions(+), 3 deletions(-)

diff --git a/flang/include/flang/Optimizer/Builder/Runtime/Reduction.h 
b/flang/include/flang/Optimizer/Builder/Runtime/Reduction.h
index 27652208b524e..fedf453a6dc8d 100644
--- a/flang/include/flang/Optimizer/Builder/Runtime/Reduction.h
+++ b/flang/include/flang/Optimizer/Builder/Runtime/Reduction.h
@@ -240,6 +240,13 @@ mlir::Value genReduce(fir::FirOpBuilder , 
mlir::Location loc,
   mlir::Value maskBox, mlir::Value identity,
   mlir::Value ordered);
 
+/// Generate call to `Reduce` intrinsic runtime routine. This is the version
+/// that takes arrays of any rank with a dim argument specified.
+void genReduceDim(fir::FirOpBuilder , mlir::Location loc,
+  mlir::Value arrayBox, mlir::Value operation, mlir::Value dim,
+  mlir::Value maskBox, mlir::Value identity,
+  mlir::Value ordered, mlir::Value resultBox);
+
 } // namespace fir::runtime
 
 #endif // FORTRAN_OPTIMIZER_BUILDER_RUNTIME_REDUCTION_H
diff --git a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp 
b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp
index 0e29849a57688..e250a476b5802 100644
--- a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp
+++ b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp
@@ -5790,7 +5790,17 @@ IntrinsicLibrary::genReduce(mlir::Type resultType,
 return fir::runtime::genReduce(builder, loc, array, operation, mask,
identity, ordered);
   }
-  TODO(loc, "reduce with array result");
+  // Handle cases that have an array result.
+  // Create mutable fir.box to be passed to the runtime for the result.
+  mlir::Type resultArrayType = builder.getVarLenSeqTy(resultType, rank - 1);
+  fir::MutableBoxValue resultMutableBox =
+  fir::factory::createTempMutableBox(builder, loc, resultArrayType);
+  mlir::Value resultIrBox =
+  fir::factory::getMutableIRBox(builder, loc, resultMutableBox);
+  mlir::Value dim = fir::getBase(args[2]);
+  fir::runtime::genReduceDim(builder, loc, array, operation, dim, mask,
+ identity, ordered, resultIrBox);
+  return readAndAddCleanUp(resultMutableBox, resultType, "REDUCE");
 }
 
 // REPEAT
diff --git a/flang/lib/Optimizer/Builder/Runtime/Reduction.cpp 
b/flang/lib/Optimizer/Builder/Runtime/Reduction.cpp
index a7cd53328d69a..e83af63916dcd 100644
--- a/flang/lib/Optimizer/Builder/Runtime/Reduction.cpp
+++ b/flang/lib/Optimizer/Builder/Runtime/Reduction.cpp
@@ -486,6 +486,28 @@ struct ForcedReduceReal16 {
   }
 };
 
+/// Placeholder for DIM real*16 version of Reduce Intrinsic
+struct ForcedReduceReal16Dim {
+  static constexpr const char *name =
+  ExpandAndQuoteKey(RTNAME(ReduceReal16Dim));
+  static constexpr fir::runtime::FuncTypeBuilderFunc getTypeModel() {
+return [](mlir::MLIRContext *ctx) {
+  auto ty = mlir::FloatType::getF128(ctx);
+  auto boxTy =
+  fir::runtime::getModel()(ctx);
+  auto opTy = mlir::FunctionType::get(ctx, {ty, ty}, ty);
+  auto strTy = fir::ReferenceType::get(mlir::IntegerType::get(ctx, 8));
+  auto intTy = mlir::IntegerType::get(ctx, 8 * sizeof(int));
+  auto refTy = fir::ReferenceType::get(ty);
+  auto refBoxTy = fir::ReferenceType::get(boxTy);
+  auto i1Ty = mlir::IntegerType::get(ctx, 1);
+  return mlir::FunctionType::get(
+  ctx, {refBoxTy, boxTy, opTy, strTy, intTy, intTy, boxTy, refTy, 
i1Ty},
+  {});
+};
+  }
+};
+
 /// Placeholder for integer*16 version of Reduce Intrinsic
 struct ForcedReduceInteger16 {
   static constexpr const char *name =
@@ -506,6 +528,28 @@ struct ForcedReduceInteger16 {
   }
 };
 
+/// Placeholder for DIM integer*16 version of Reduce Intrinsic
+struct ForcedReduceInteger16Dim {
+  static constexpr const char *name =
+  ExpandAndQuoteKey(RTNAME(ReduceInteger16Dim));
+  static constexpr fir::runtime::FuncTypeBuilderFunc getTypeModel() {
+return [](mlir::MLIRContext *ctx) {
+  auto ty = mlir::IntegerType::get(ctx, 128);
+  auto boxTy =
+  fir::runtime::getModel()(ctx);
+  auto opTy = mlir::FunctionType::get(ctx, {ty, ty}, ty);
+  auto strTy = fir::ReferenceType::get(mlir::IntegerType::get(ctx, 8));
+  auto intTy = mlir::IntegerType::get(ctx, 8 * sizeof(int));
+  auto refTy = fir::ReferenceType::get(ty);
+  auto refBoxTy = 

[llvm-branch-commits] [flang] [flang] Lower REDUCE intrinsic with DIM argument (PR #94771)

2024-06-07 Thread Valentin Clement バレンタイン クレメン via llvm-branch-commits


@@ -527,10 +571,32 @@ struct ForcedReduceComplex10 {
   }
 };
 
+/// Placeholder for Dim complex(10) version of Reduce Intrinsic
+struct ForcedReduceComplex10Dim {
+  static constexpr const char *name =
+  ExpandAndQuoteKey(RTNAME(CppReduceComplex10Dim));
+  static constexpr fir::runtime::FuncTypeBuilderFunc getTypeModel() {
+return [](mlir::MLIRContext *ctx) {
+  auto ty = mlir::ComplexType::get(mlir::FloatType::getF80(ctx));
+  auto boxTy =
+  fir::runtime::getModel()(ctx);
+  auto opTy = mlir::FunctionType::get(ctx, {ty, ty}, ty);
+  auto strTy = fir::ReferenceType::get(mlir::IntegerType::get(ctx, 8));
+  auto intTy = mlir::IntegerType::get(ctx, 8 * sizeof(int));
+  auto refTy = fir::ReferenceType::get(ty);
+  auto refBoxTy = fir::ReferenceType::get(boxTy);
+  auto i1Ty = mlir::IntegerType::get(ctx, 1);
+  return mlir::FunctionType::get(
+  ctx, {refBoxTy, boxTy, opTy, strTy, intTy, intTy, boxTy, refTy, 
i1Ty},
+  {});
+};
+  }
+};
+
 /// Placeholder for complex(16) version of Reduce Intrinsic
 struct ForcedReduceComplex16 {
   static constexpr const char *name =
-  ExpandAndQuoteKey(RTNAME(CppReduceComplex16));
+  ExpandAndQuoteKey(RTNAME(CppReduceComplex16Dim));

clementval wrote:

```suggestion
  ExpandAndQuoteKey(RTNAME(CppReduceComplex16));
```

https://github.com/llvm/llvm-project/pull/94771
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [flang] [flang] Lower REDUCE intrinsic with DIM argument (PR #94771)

2024-06-07 Thread Valentin Clement バレンタイン クレメン via llvm-branch-commits


@@ -527,10 +571,32 @@ struct ForcedReduceComplex10 {
   }
 };
 
+/// Placeholder for Dim complex(10) version of Reduce Intrinsic
+struct ForcedReduceComplex10Dim {
+  static constexpr const char *name =
+  ExpandAndQuoteKey(RTNAME(CppReduceComplex10Dim));
+  static constexpr fir::runtime::FuncTypeBuilderFunc getTypeModel() {
+return [](mlir::MLIRContext *ctx) {
+  auto ty = mlir::ComplexType::get(mlir::FloatType::getF80(ctx));
+  auto boxTy =
+  fir::runtime::getModel()(ctx);
+  auto opTy = mlir::FunctionType::get(ctx, {ty, ty}, ty);
+  auto strTy = fir::ReferenceType::get(mlir::IntegerType::get(ctx, 8));
+  auto intTy = mlir::IntegerType::get(ctx, 8 * sizeof(int));
+  auto refTy = fir::ReferenceType::get(ty);
+  auto refBoxTy = fir::ReferenceType::get(boxTy);
+  auto i1Ty = mlir::IntegerType::get(ctx, 1);
+  return mlir::FunctionType::get(
+  ctx, {refBoxTy, boxTy, opTy, strTy, intTy, intTy, boxTy, refTy, 
i1Ty},
+  {});
+};
+  }
+};
+
 /// Placeholder for complex(16) version of Reduce Intrinsic
 struct ForcedReduceComplex16 {
   static constexpr const char *name =
-  ExpandAndQuoteKey(RTNAME(CppReduceComplex16));
+  ExpandAndQuoteKey(RTNAME(CppReduceComplex16Dim));

clementval wrote:

No bad change! Thanks for catching it!

https://github.com/llvm/llvm-project/pull/94771
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [flang] [flang] Lower REDUCE intrinsic with DIM argument (PR #94771)

2024-06-07 Thread Valentin Clement バレンタイン クレメン via llvm-branch-commits

https://github.com/clementval created 
https://github.com/llvm/llvm-project/pull/94771

This is a follow up patch to #94652 and handles the lowering of the reduce 
intrinsic with DIM argument and non scalar result. 

>From fd911977863888c7c005f00ae05049b32fe9d4d6 Mon Sep 17 00:00:00 2001
From: Valentin Clement 
Date: Thu, 6 Jun 2024 14:17:44 -0700
Subject: [PATCH] [flang] Lower REDUCE intrinsic with DIM argument

---
 .../Optimizer/Builder/Runtime/Reduction.h |   7 +
 flang/lib/Optimizer/Builder/IntrinsicCall.cpp |  12 +-
 .../Optimizer/Builder/Runtime/Reduction.cpp   | 186 ++-
 flang/test/Lower/Intrinsics/reduce.f90| 221 ++
 4 files changed, 423 insertions(+), 3 deletions(-)

diff --git a/flang/include/flang/Optimizer/Builder/Runtime/Reduction.h 
b/flang/include/flang/Optimizer/Builder/Runtime/Reduction.h
index 27652208b524e..fedf453a6dc8d 100644
--- a/flang/include/flang/Optimizer/Builder/Runtime/Reduction.h
+++ b/flang/include/flang/Optimizer/Builder/Runtime/Reduction.h
@@ -240,6 +240,13 @@ mlir::Value genReduce(fir::FirOpBuilder , 
mlir::Location loc,
   mlir::Value maskBox, mlir::Value identity,
   mlir::Value ordered);
 
+/// Generate call to `Reduce` intrinsic runtime routine. This is the version
+/// that takes arrays of any rank with a dim argument specified.
+void genReduceDim(fir::FirOpBuilder , mlir::Location loc,
+  mlir::Value arrayBox, mlir::Value operation, mlir::Value dim,
+  mlir::Value maskBox, mlir::Value identity,
+  mlir::Value ordered, mlir::Value resultBox);
+
 } // namespace fir::runtime
 
 #endif // FORTRAN_OPTIMIZER_BUILDER_RUNTIME_REDUCTION_H
diff --git a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp 
b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp
index 0e29849a57688..e250a476b5802 100644
--- a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp
+++ b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp
@@ -5790,7 +5790,17 @@ IntrinsicLibrary::genReduce(mlir::Type resultType,
 return fir::runtime::genReduce(builder, loc, array, operation, mask,
identity, ordered);
   }
-  TODO(loc, "reduce with array result");
+  // Handle cases that have an array result.
+  // Create mutable fir.box to be passed to the runtime for the result.
+  mlir::Type resultArrayType = builder.getVarLenSeqTy(resultType, rank - 1);
+  fir::MutableBoxValue resultMutableBox =
+  fir::factory::createTempMutableBox(builder, loc, resultArrayType);
+  mlir::Value resultIrBox =
+  fir::factory::getMutableIRBox(builder, loc, resultMutableBox);
+  mlir::Value dim = fir::getBase(args[2]);
+  fir::runtime::genReduceDim(builder, loc, array, operation, dim, mask,
+ identity, ordered, resultIrBox);
+  return readAndAddCleanUp(resultMutableBox, resultType, "REDUCE");
 }
 
 // REPEAT
diff --git a/flang/lib/Optimizer/Builder/Runtime/Reduction.cpp 
b/flang/lib/Optimizer/Builder/Runtime/Reduction.cpp
index a7cd53328d69a..e83af63916dcd 100644
--- a/flang/lib/Optimizer/Builder/Runtime/Reduction.cpp
+++ b/flang/lib/Optimizer/Builder/Runtime/Reduction.cpp
@@ -486,6 +486,28 @@ struct ForcedReduceReal16 {
   }
 };
 
+/// Placeholder for DIM real*16 version of Reduce Intrinsic
+struct ForcedReduceReal16Dim {
+  static constexpr const char *name =
+  ExpandAndQuoteKey(RTNAME(ReduceReal16Dim));
+  static constexpr fir::runtime::FuncTypeBuilderFunc getTypeModel() {
+return [](mlir::MLIRContext *ctx) {
+  auto ty = mlir::FloatType::getF128(ctx);
+  auto boxTy =
+  fir::runtime::getModel()(ctx);
+  auto opTy = mlir::FunctionType::get(ctx, {ty, ty}, ty);
+  auto strTy = fir::ReferenceType::get(mlir::IntegerType::get(ctx, 8));
+  auto intTy = mlir::IntegerType::get(ctx, 8 * sizeof(int));
+  auto refTy = fir::ReferenceType::get(ty);
+  auto refBoxTy = fir::ReferenceType::get(boxTy);
+  auto i1Ty = mlir::IntegerType::get(ctx, 1);
+  return mlir::FunctionType::get(
+  ctx, {refBoxTy, boxTy, opTy, strTy, intTy, intTy, boxTy, refTy, 
i1Ty},
+  {});
+};
+  }
+};
+
 /// Placeholder for integer*16 version of Reduce Intrinsic
 struct ForcedReduceInteger16 {
   static constexpr const char *name =
@@ -506,6 +528,28 @@ struct ForcedReduceInteger16 {
   }
 };
 
+/// Placeholder for DIM integer*16 version of Reduce Intrinsic
+struct ForcedReduceInteger16Dim {
+  static constexpr const char *name =
+  ExpandAndQuoteKey(RTNAME(ReduceInteger16Dim));
+  static constexpr fir::runtime::FuncTypeBuilderFunc getTypeModel() {
+return [](mlir::MLIRContext *ctx) {
+  auto ty = mlir::IntegerType::get(ctx, 128);
+  auto boxTy =
+  fir::runtime::getModel()(ctx);
+  auto opTy = mlir::FunctionType::get(ctx, {ty, ty}, ty);
+  auto strTy = fir::ReferenceType::get(mlir::IntegerType::get(ctx, 8));
+  auto intTy = mlir::IntegerType::get(ctx, 8 * sizeof(int));
+  auto refTy = 

[llvm-branch-commits] [llvm] [Frontend] Introduce `getDirectiveCategory` for ACC/OMP directives (PR #94689)

2024-06-06 Thread Valentin Clement バレンタイン クレメン via llvm-branch-commits


@@ -308,6 +310,7 @@ def ACC_Declare : Directive<"declare"> {
 VersionedClause
   ];
   let association = AS_None;
+  let category = CA_Executable;

clementval wrote:

```suggestion
  let category = CA_Declarative;
```

https://github.com/llvm/llvm-project/pull/94689
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [flang] [flang][cuda] Lower device/managed/unified allocation to cuda ops (PR #90526)

2024-04-30 Thread Valentin Clement バレンタイン クレメン via llvm-branch-commits

https://github.com/clementval closed 
https://github.com/llvm/llvm-project/pull/90526
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [flang] [flang][cuda] Lower device/managed/unified allocation to cuda ops (PR #90526)

2024-04-29 Thread Valentin Clement バレンタイン クレメン via llvm-branch-commits

clementval wrote:

> Thank you, Valentin!
> 
> Is it expected that we can have a mix of `fir.alloca` and `fir.cuda_alloc` 
> operations in the device routines (e.g. I suppose 
> `fir::FirOpBuilder::createTemporaryAlloc` can generate `fir.alloca` for a 
> temporary location in device code)? It is not necessarily an issue, I just 
> want to understand whether we will have to handle both operations in the 
> device code.

createTemporaryAlloc will also need to be modified to issue 
cuda_alloc/cuda_free. I'm still evaluating the extend of the change. fir.alloca 
are fine in device code as long as they are not device, managed or unified as 
we can support them with the address space. Note that creating managed or 
unified variabled in device subprogram is not recommended. 

https://github.com/llvm/llvm-project/pull/90526
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [flang] [flang][cuda] Lower device/managed/unified allocation to cuda ops (PR #90526)

2024-04-29 Thread Valentin Clement バレンタイン クレメン via llvm-branch-commits

https://github.com/clementval created 
https://github.com/llvm/llvm-project/pull/90526

Lower locals allocation of cuda device, managed and unified variables to 
fir.cuda_alloc. Add fir.cuda_free in the function context finalization. 

>From 02d1ef45cae1ba973a51e5898f092403395c Mon Sep 17 00:00:00 2001
From: Valentin Clement 
Date: Mon, 29 Apr 2024 14:30:46 -0700
Subject: [PATCH] [flang][cuda] Lower device/managed/unified allocation to cuda
 ops

---
 .../flang/Optimizer/Builder/FIRBuilder.h  |  7 +
 flang/include/flang/Semantics/tools.h | 17 +++
 flang/lib/Lower/ConvertVariable.cpp   | 29 +++
 flang/lib/Optimizer/Builder/FIRBuilder.cpp| 25 +---
 flang/lib/Optimizer/Dialect/FIROps.cpp| 15 ++
 flang/test/Lower/CUDA/cuda-data-attribute.cuf | 25 
 6 files changed, 107 insertions(+), 11 deletions(-)

diff --git a/flang/include/flang/Optimizer/Builder/FIRBuilder.h 
b/flang/include/flang/Optimizer/Builder/FIRBuilder.h
index e4c954159f71be..0d650f830b64e0 100644
--- a/flang/include/flang/Optimizer/Builder/FIRBuilder.h
+++ b/flang/include/flang/Optimizer/Builder/FIRBuilder.h
@@ -708,6 +708,13 @@ mlir::Value createNullBoxProc(fir::FirOpBuilder , 
mlir::Location loc,
 
 /// Set internal linkage attribute on a function.
 void setInternalLinkage(mlir::func::FuncOp);
+
+llvm::SmallVector
+elideExtentsAlreadyInType(mlir::Type type, mlir::ValueRange shape);
+
+llvm::SmallVector
+elideLengthsAlreadyInType(mlir::Type type, mlir::ValueRange lenParams);
+
 } // namespace fir::factory
 
 #endif // FORTRAN_OPTIMIZER_BUILDER_FIRBUILDER_H
diff --git a/flang/include/flang/Semantics/tools.h 
b/flang/include/flang/Semantics/tools.h
index da10969ebc7021..c9eb5bc857ac01 100644
--- a/flang/include/flang/Semantics/tools.h
+++ b/flang/include/flang/Semantics/tools.h
@@ -222,6 +222,23 @@ inline bool HasCUDAAttr(const Symbol ) {
   return false;
 }
 
+inline bool NeedCUDAAlloc(const Symbol ) {
+  bool inDeviceSubprogram{IsCUDADeviceContext(())};
+  if (const auto *details{
+  sym.GetUltimate().detailsIf()}) {
+if (details->cudaDataAttr() &&
+(*details->cudaDataAttr() == common::CUDADataAttr::Device ||
+*details->cudaDataAttr() == common::CUDADataAttr::Managed ||
+*details->cudaDataAttr() == common::CUDADataAttr::Unified)) {
+  // Descriptor is allocated on host when in host context.
+  if (Fortran::semantics::IsAllocatable(sym))
+return inDeviceSubprogram;
+  return true;
+}
+  }
+  return false;
+}
+
 const Scope *FindCUDADeviceContext(const Scope *);
 std::optional GetCUDADataAttr(const Symbol *);
 
diff --git a/flang/lib/Lower/ConvertVariable.cpp 
b/flang/lib/Lower/ConvertVariable.cpp
index 21db0cac11bf6a..9a17acf5b15c36 100644
--- a/flang/lib/Lower/ConvertVariable.cpp
+++ b/flang/lib/Lower/ConvertVariable.cpp
@@ -693,6 +693,22 @@ static mlir::Value 
createNewLocal(Fortran::lower::AbstractConverter ,
   if (ultimateSymbol.test(Fortran::semantics::Symbol::Flag::CrayPointee))
 return builder.create(loc, fir::ReferenceType::get(ty));
 
+  if (Fortran::semantics::NeedCUDAAlloc(ultimateSymbol)) {
+fir::CUDADataAttributeAttr cudaAttr =
+Fortran::lower::translateSymbolCUDADataAttribute(builder.getContext(),
+ ultimateSymbol);
+llvm::SmallVector indices;
+llvm::SmallVector elidedShape =
+fir::factory::elideExtentsAlreadyInType(ty, shape);
+llvm::SmallVector elidedLenParams =
+fir::factory::elideLengthsAlreadyInType(ty, lenParams);
+auto idxTy = builder.getIndexType();
+for (mlir::Value sh : elidedShape)
+  indices.push_back(builder.createConvert(loc, idxTy, sh));
+return builder.create(loc, ty, nm, symNm, cudaAttr,
+lenParams, indices);
+  }
+
   // Let the builder do all the heavy lifting.
   if (!Fortran::semantics::IsProcedurePointer(ultimateSymbol))
 return builder.allocateLocal(loc, ty, nm, symNm, shape, lenParams, isTarg);
@@ -927,6 +943,19 @@ static void 
instantiateLocal(Fortran::lower::AbstractConverter ,
   });
 }
   }
+  if (Fortran::semantics::NeedCUDAAlloc(var.getSymbol())) {
+auto *builder = ();
+mlir::Location loc = converter.getCurrentLocation();
+fir::ExtendedValue exv =
+converter.getSymbolExtendedValue(var.getSymbol(), );
+auto *sym = ();
+converter.getFctCtx().attachCleanup([builder, loc, exv, sym]() {
+  fir::CUDADataAttributeAttr cudaAttr =
+  Fortran::lower::translateSymbolCUDADataAttribute(
+  builder->getContext(), *sym);
+  builder->create(loc, fir::getBase(exv), cudaAttr);
+});
+  }
 }
 
 //======//
diff --git a/flang/lib/Optimizer/Builder/FIRBuilder.cpp 
b/flang/lib/Optimizer/Builder/FIRBuilder.cpp
index a0fbae5b614cc7..a813b646087d73 100644
--- 

[llvm-branch-commits] [flang] [flang][OpenMP] simplify getReductionName (PR #85666)

2024-03-18 Thread Valentin Clement バレンタイン クレメン via llvm-branch-commits

clementval wrote:

Great to see this re-used!

https://github.com/llvm/llvm-project/pull/85666
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [flang] [flang] run CFG conversion on omp reduction declare ops (PR #84953)

2024-03-14 Thread Valentin Clement バレンタイン クレメン via llvm-branch-commits

clementval wrote:

> > Wouldn't it be cleaner to expose the patterns via a 
> > `populateFirCfgConversionPatterns` function and reuse it in you extra pass 
> > instead of making two pass from the initial file?
> > We did this recently for the FirToLLVM patterns. #83492
> 
> Thanks for taking a look at this. I have extracted the patterns into an 
> externally visible function, but I don't understand the benefit of splitting 
> the two CFG conversion passes out into different files. I think this could be 
> confusing because it would be unclear which file one should contain the 
> definition of those conversion patterns. Keeping it in one file makes it 
> clear that both do exactly the same thing on different target operations.

Wouldn't applying the patterns on the module in a single pass work here as 
well? 

https://github.com/llvm/llvm-project/pull/84953
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [flang] [flang][CodeGen] Run PreCGRewrite on omp reduction declare ops (PR #84954)

2024-03-14 Thread Valentin Clement バレンタイン クレメン via llvm-branch-commits

https://github.com/clementval edited 
https://github.com/llvm/llvm-project/pull/84954
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [flang] [flang][CodeGen] Run PreCGRewrite on omp reduction declare ops (PR #84954)

2024-03-14 Thread Valentin Clement バレンタイン クレメン via llvm-branch-commits

https://github.com/clementval edited 
https://github.com/llvm/llvm-project/pull/84954
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [flang] [flang][CodeGen] Run PreCGRewrite on omp reduction declare ops (PR #84954)

2024-03-14 Thread Valentin Clement バレンタイン クレメン via llvm-branch-commits

https://github.com/clementval approved this pull request.

LGTM. Just a nit comment for an even cleaner code. 

https://github.com/llvm/llvm-project/pull/84954
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [flang] [flang][CodeGen] Run PreCGRewrite on omp reduction declare ops (PR #84954)

2024-03-14 Thread Valentin Clement バレンタイン クレメン via llvm-branch-commits


@@ -314,11 +314,8 @@ class CodeGenRewrite : public 
fir::impl::CodeGenRewriteBase {
 
   void runOnOperation() override final {
 // Call runOn on all top level regions that may contain 
emboxOp/arrayCoorOp.
-auto mod = getOperation();
-for (auto func : mod.getOps())
-  runOn(func, func.getBody());
-for (auto global : mod.getOps())
-  runOn(global, global.getRegion());
+mlir::ModuleOp mod = getOperation();
+runOn(mod);

clementval wrote:

nit: merge `ronOn()` in `runOnOperation()` since it has only a single use now. 

https://github.com/llvm/llvm-project/pull/84954
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [flang] [flang] run CFG conversion on omp reduction declare ops (PR #84953)

2024-03-13 Thread Valentin Clement バレンタイン クレメン via llvm-branch-commits

clementval wrote:

Wouldn't it be cleaner to expose the patterns via a 
`populateFirCfgConversionPatterns` function and reuse it in you extra pass 
instead of making two pass from the initial file?

We did this recently for the FirToLLVM patterns. 
https://github.com/llvm/llvm-project/pull/83492
 

https://github.com/llvm/llvm-project/pull/84953
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [flang] [flang][CodeGen] Run PreCGRewrite on omp reduction declare ops (PR #84954)

2024-03-13 Thread Valentin Clement バレンタイン クレメン via llvm-branch-commits


@@ -319,6 +320,10 @@ class CodeGenRewrite : public 
fir::impl::CodeGenRewriteBase {
   runOn(func, func.getBody());
 for (auto global : mod.getOps())
   runOn(global, global.getRegion());
+for (auto omp : mod.getOps()) {

clementval wrote:

Wouldn't applying the patterns on the module directly work for all operation in 
it? So we could get rid of call to runOn. 

https://github.com/llvm/llvm-project/pull/84954
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [flang] [flang][CodeGen] Run PreCGRewrite on omp reduction declare ops (PR #84954)

2024-03-13 Thread Valentin Clement バレンタイン クレメン via llvm-branch-commits


@@ -22,6 +22,7 @@
 #include "mlir/Transforms/RegionUtils.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/Support/Debug.h"
+#include 

clementval wrote:

Why <> instead of ""?

https://github.com/llvm/llvm-project/pull/84954
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [flang] [flang][NFC] move extractSequenceType helper out of OpenACC to share code (PR #84957)

2024-03-12 Thread Valentin Clement バレンタイン クレメン via llvm-branch-commits

https://github.com/clementval approved this pull request.


https://github.com/llvm/llvm-project/pull/84957
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [Flang][OpenMP][MLIR] Initial derived type member map support (PR #82853)

2024-02-23 Thread Valentin Clement バレンタイン クレメン via llvm-branch-commits

clementval wrote:

> Yes, I am sorry about that, I'd like to make it as easy as possible to review 
> for you all as I know it's not an easy task and I greatly appreciate the 
> effort you all put in, so I will most definitely keep it mind in the future!

Thanks! 


https://github.com/llvm/llvm-project/pull/82853
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [Flang][OpenMP][MLIR] Initial derived type member map support (PR #82853)

2024-02-23 Thread Valentin Clement バレンタイン クレメン via llvm-branch-commits

clementval wrote:

> > You could do the renaming of the pass as an NFC PR so it would make the non 
> > NFC changes more obvious to spot at. Not necessary to do it for this time 
> > but next time it would be nice.
> 
> I'm very sorry about that! I didn't realise it would be a problem, but I will 
> keep it in mind for the future for any renaming + change sets I aim to 
> upstream. Thank you for bringing it up.

Not an issue at all. It's just simpler to look at small PRs :-) 

https://github.com/llvm/llvm-project/pull/82853
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [Flang][OpenMP][MLIR] Initial derived type member map support (PR #82853)

2024-02-23 Thread Valentin Clement バレンタイン クレメン via llvm-branch-commits

clementval wrote:

You could do the renaming of the pass as an NFC PR so it would make the non NFC 
changes more obvious to spot at. Not necessary to do it for this time but next 
time it would be nice. 

https://github.com/llvm/llvm-project/pull/82853
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [flang] [llvm] [flang][OpenMP] Main splitting functionality dev-complete (PR #82003)

2024-02-19 Thread Valentin Clement バレンタイン クレメン via llvm-branch-commits


@@ -48,6 +49,29 @@ using DeclareTargetCapturePair =
 // Common helper functions
 
//===--===//
 
+static llvm::ArrayRef getWorksharing() {
+  static llvm::omp::Directive worksharing[] = {
+  llvm::omp::Directive::OMPD_do, llvm::omp::Directive::OMPD_for,
+  llvm::omp::Directive::OMPD_scope,  llvm::omp::Directive::OMPD_sections,
+  llvm::omp::Directive::OMPD_single, llvm::omp::Directive::OMPD_workshare,
+  };
+  return worksharing;
+}
+
+static llvm::ArrayRef getWorksharingLoop() {
+  static llvm::omp::Directive worksharingLoop[] = {
+  llvm::omp::Directive::OMPD_do,
+  llvm::omp::Directive::OMPD_for,
+  };
+  return worksharingLoop;
+}
+
+static uint32_t getOpenMPVersion(const mlir::ModuleOp ) {
+  if (mlir::Attribute verAttr = mod->getAttr("omp.version"))
+return llvm::cast(verAttr).getVersion();
+  llvm_unreachable("Exoecting OpenMP version attribute in module");

clementval wrote:

```suggestion
  llvm_unreachable("Expecting OpenMP version attribute in module");
```

https://github.com/llvm/llvm-project/pull/82003
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [flang] [flang][OpenMP] Add support for copyprivate (PR #80485)

2024-02-02 Thread Valentin Clement バレンタイン クレメン via llvm-branch-commits


@@ -1092,6 +1040,79 @@ class FirConverter : public 
Fortran::lower::AbstractConverter {
 return true;
   }
 
+  void copyVar(const Fortran::semantics::Symbol ,
+   const Fortran::lower::SymbolBox _sb,
+   const Fortran::lower::SymbolBox _sb) {
+mlir::Location loc = genLocation(sym.name());
+if (lowerToHighLevelFIR())
+  copyVarHLFIR(loc, lhs_sb.getAddr(), rhs_sb.getAddr());
+else
+  copyVarFIR(loc, sym, lhs_sb, rhs_sb);
+  }
+
+  void copyVarHLFIR(mlir::Location loc, mlir::Value dst, mlir::Value src) {
+assert(lowerToHighLevelFIR());
+hlfir::Entity lhs{dst};
+hlfir::Entity rhs{src};
+// Temporary_lhs is set to true in hlfir.assign below to avoid user
+// assignment to be used and finalization to be called on the LHS.
+// This may or may not be correct but mimics the current behaviour
+// without HLFIR.
+auto copyData = [&](hlfir::Entity l, hlfir::Entity r) {
+  // Dereference RHS and load it if trivial scalar.
+  r = hlfir::loadTrivialScalar(loc, *builder, r);
+  builder->create(
+  loc, r, l,
+  /*isWholeAllocatableAssignment=*/false,
+  /*keepLhsLengthInAllocatableAssignment=*/false,
+  /*temporary_lhs=*/true);
+};
+if (lhs.isAllocatable()) {
+  // Deep copy allocatable if it is allocated.
+  // Note that when allocated, the RHS is already allocated with the LHS
+  // shape for copy on entry in createHostAssociateVarClone.
+  // For lastprivate, this assumes that the RHS was not reallocated in
+  // the OpenMP region.
+  lhs = hlfir::derefPointersAndAllocatables(loc, *builder, lhs);
+  mlir::Value addr = hlfir::genVariableRawAddress(loc, *builder, lhs);
+  mlir::Value isAllocated = builder->genIsNotNullAddr(loc, addr);
+  builder->genIfThen(loc, isAllocated)
+  .genThen([&]() {
+// Copy the DATA, not the descriptors.
+copyData(lhs, rhs);
+  })
+  .end();
+} else if (lhs.isPointer()) {
+  // Set LHS target to the target of RHS (do not copy the RHS
+  // target data into the LHS target storage).
+  auto loadVal = builder->create(loc, rhs);
+  builder->create(loc, loadVal, lhs);
+} else {
+  // Non ALLOCATABLE/POINTER variable. Simple DATA copy.
+  copyData(lhs, rhs);
+}
+  }
+
+  void copyVarFIR(mlir::Location loc, const Fortran::semantics::Symbol ,
+  const Fortran::lower::SymbolBox _sb,
+  const Fortran::lower::SymbolBox _sb) {
+assert(!lowerToHighLevelFIR());
+fir::ExtendedValue lhs = symBoxToExtendedValue(lhs_sb);
+fir::ExtendedValue rhs = symBoxToExtendedValue(rhs_sb);
+mlir::Type symType = genType(sym);
+if (auto seqTy = symType.dyn_cast()) {
+  Fortran::lower::StatementContext stmtCtx;
+  Fortran::lower::createSomeArrayAssignment(*this, lhs, rhs, localSymbols,
+stmtCtx);
+  stmtCtx.finalizeAndReset();
+} else if (lhs.getBoxOf()) {
+  fir::factory::CharacterExprHelper{*builder, loc}.createAssign(lhs, rhs);
+} else {
+  auto loadVal = builder->create(loc, fir::getBase(rhs));
+  builder->create(loc, loadVal, fir::getBase(lhs));
+}
+  }
+

clementval wrote:

Could this be in OpenMP.cpp? It would remove the need to add a function to the 
converter. 

https://github.com/llvm/llvm-project/pull/80485
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [flang] [flang][openacc] Carry device dependent info for acc routine in the module file (PR #77804)

2024-01-11 Thread Valentin Clement バレンタイン クレメン via llvm-branch-commits
Valentin Clement =?utf-8?b?KOODkOODrOODsw=?Message-ID:
In-Reply-To: 


clementval wrote:

For some reason this PR has been closed when I merged the parent PR. I'll apply 
the suggestion and push the commit manually. 

https://github.com/llvm/llvm-project/pull/77804
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [flang] [flang][openacc] Carry device dependent info for acc routine in the module file (PR #77804)

2024-01-11 Thread Valentin Clement バレンタイン クレメン via llvm-branch-commits
Valentin Clement =?utf-8?b?KOODkOODrOODsw=?Message-ID:
In-Reply-To: 


https://github.com/clementval closed 
https://github.com/llvm/llvm-project/pull/77804
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [flang] [flang][openacc] Carry device dependent info for acc routine in the module file (PR #77804)

2024-01-11 Thread Valentin Clement バレンタイン クレメン via llvm-branch-commits
Valentin Clement =?utf-8?b?KOODkOODrOODsw=?Message-ID:
In-Reply-To: 


https://github.com/clementval updated 
https://github.com/llvm/llvm-project/pull/77804

>From d417332a130814e34fcc2448430d71b0fd5376b5 Mon Sep 17 00:00:00 2001
From: Valentin Clement 
Date: Wed, 10 Jan 2024 21:26:53 -0800
Subject: [PATCH 1/2] [flang][openacc] Carry device dependent info for routine
 in the module file

---
 flang/include/flang/Common/Fortran.h |  4 ++
 flang/include/flang/Parser/dump-parse-tree.h |  3 +-
 flang/include/flang/Parser/parse-tree.h  |  4 +-
 flang/include/flang/Semantics/symbol.h   | 29 +--
 flang/lib/Lower/OpenACC.cpp  | 16 +++---
 flang/lib/Parser/openacc-parsers.cpp | 14 ++---
 flang/lib/Semantics/mod-file.cpp | 54 ++--
 flang/lib/Semantics/resolve-directives.cpp   | 50 +++---
 flang/test/Semantics/OpenACC/acc-module.f90  | 13 +
 9 files changed, 142 insertions(+), 45 deletions(-)

diff --git a/flang/include/flang/Common/Fortran.h 
b/flang/include/flang/Common/Fortran.h
index 4007bfc7994f98..1d3a85e2500733 100644
--- a/flang/include/flang/Common/Fortran.h
+++ b/flang/include/flang/Common/Fortran.h
@@ -87,6 +87,10 @@ ENUM_CLASS(CUDASubprogramAttrs, Host, Device, HostDevice, 
Global, Grid_Global)
 // CUDA data attributes; mutually exclusive
 ENUM_CLASS(CUDADataAttr, Constant, Device, Managed, Pinned, Shared, Texture)
 
+// OpenACC device types
+ENUM_CLASS(
+OpenACCDeviceType, Star, Default, Nvidia, Radeon, Host, Multicore, None)
+
 // OpenMP atomic_default_mem_order clause allowed values
 ENUM_CLASS(OmpAtomicDefaultMemOrderType, SeqCst, AcqRel, Relaxed)
 
diff --git a/flang/include/flang/Parser/dump-parse-tree.h 
b/flang/include/flang/Parser/dump-parse-tree.h
index 1defbf132327c4..d067a7273540f0 100644
--- a/flang/include/flang/Parser/dump-parse-tree.h
+++ b/flang/include/flang/Parser/dump-parse-tree.h
@@ -48,6 +48,7 @@ class ParseTreeDumper {
   NODE(std, uint64_t)
   NODE_ENUM(common, CUDADataAttr)
   NODE_ENUM(common, CUDASubprogramAttrs)
+  NODE_ENUM(common, OpenACCDeviceType)
   NODE(format, ControlEditDesc)
   NODE(format::ControlEditDesc, Kind)
   NODE(format, DerivedTypeDataEditDesc)
@@ -101,7 +102,7 @@ class ParseTreeDumper {
   NODE(parser, AccSelfClause)
   NODE(parser, AccStandaloneDirective)
   NODE(parser, AccDeviceTypeExpr)
-  NODE_ENUM(parser::AccDeviceTypeExpr, Device)
+
   NODE(parser, AccDeviceTypeExprList)
   NODE(parser, AccTileExpr)
   NODE(parser, AccTileExprList)
diff --git a/flang/include/flang/Parser/parse-tree.h 
b/flang/include/flang/Parser/parse-tree.h
index 71195f2bb9ddc4..e9bfb728a2bef6 100644
--- a/flang/include/flang/Parser/parse-tree.h
+++ b/flang/include/flang/Parser/parse-tree.h
@@ -4072,8 +4072,8 @@ struct AccWaitArgument {
 };
 
 struct AccDeviceTypeExpr {
-  ENUM_CLASS(Device, Star, Default, Nvidia, Radeon, Host, Multicore)
-  WRAPPER_CLASS_BOILERPLATE(AccDeviceTypeExpr, Device);
+  WRAPPER_CLASS_BOILERPLATE(
+  AccDeviceTypeExpr, Fortran::common::OpenACCDeviceType);
   CharBlock source;
 };
 
diff --git a/flang/include/flang/Semantics/symbol.h 
b/flang/include/flang/Semantics/symbol.h
index f6f195b6bb95b2..2b6b18e40e6528 100644
--- a/flang/include/flang/Semantics/symbol.h
+++ b/flang/include/flang/Semantics/symbol.h
@@ -112,7 +112,8 @@ class WithBindName {
   bool isExplicitBindName_{false};
 };
 
-class OpenACCRoutineInfo {
+// Device type specific OpenACC routine information
+class OpenACCRoutineDeviceTypeInfo {
 public:
   bool isSeq() const { return isSeq_; }
   void set_isSeq(bool value = true) { isSeq_ = value; }
@@ -124,12 +125,12 @@ class OpenACCRoutineInfo {
   void set_isGang(bool value = true) { isGang_ = value; }
   unsigned gangDim() const { return gangDim_; }
   void set_gangDim(unsigned value) { gangDim_ = value; }
-  bool isNohost() const { return isNohost_; }
-  void set_isNohost(bool value = true) { isNohost_ = value; }
   const std::string *bindName() const {
 return bindName_ ? &*bindName_ : nullptr;
   }
   void set_bindName(std::string &) { bindName_ = std::move(name); }
+  void set_dType(Fortran::common::OpenACCDeviceType dType) { dType_ = dType; }
+  Fortran::common::OpenACCDeviceType dType() const { return dType_; }
 
 private:
   bool isSeq_{false};
@@ -137,8 +138,28 @@ class OpenACCRoutineInfo {
   bool isWorker_{false};
   bool isGang_{false};
   unsigned gangDim_{0};
-  bool isNohost_{false};
   std::optional bindName_;
+  Fortran::common::OpenACCDeviceType dType_{
+  Fortran::common::OpenACCDeviceType::None};
+};
+
+// OpenACC routine information. Device independent info are stored on the
+// OpenACCRoutineInfo instance while device dependent info are stored
+// in as objects in the OpenACCRoutineDeviceTypeInfo list.
+class OpenACCRoutineInfo : public OpenACCRoutineDeviceTypeInfo {
+public:
+  bool isNohost() const { return isNohost_; }
+  void set_isNohost(bool value = true) { isNohost_ = value; }
+  std::list () {
+return 

[llvm-branch-commits] [flang] [flang][openacc] Carry device dependent info for acc routine in the module file (PR #77804)

2024-01-11 Thread Valentin Clement バレンタイン クレメン via llvm-branch-commits


@@ -124,21 +125,41 @@ class OpenACCRoutineInfo {
   void set_isGang(bool value = true) { isGang_ = value; }
   unsigned gangDim() const { return gangDim_; }
   void set_gangDim(unsigned value) { gangDim_ = value; }
-  bool isNohost() const { return isNohost_; }
-  void set_isNohost(bool value = true) { isNohost_ = value; }
   const std::string *bindName() const {
 return bindName_ ? &*bindName_ : nullptr;
   }
   void set_bindName(std::string &) { bindName_ = std::move(name); }
+  void set_dType(Fortran::common::OpenACCDeviceType dType) { dType_ = dType; }
+  Fortran::common::OpenACCDeviceType dType() const { return dType_; }
 
 private:
   bool isSeq_{false};
   bool isVector_{false};
   bool isWorker_{false};
   bool isGang_{false};
   unsigned gangDim_{0};
-  bool isNohost_{false};
   std::optional bindName_;
+  Fortran::common::OpenACCDeviceType dType_{
+  Fortran::common::OpenACCDeviceType::None};
+};
+
+// OpenACC routine information. Device independent info are stored on the
+// OpenACCRoutineInfo instance while device dependent info are stored
+// in as objects in the OpenACCRoutineDeviceTypeInfo list.

clementval wrote:

```suggestion
// OpenACCRoutineInfo instance while device dependent info are stored
// as objects in the OpenACCRoutineDeviceTypeInfo list.
```

https://github.com/llvm/llvm-project/pull/77804
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [flang] [flang][openacc] Carry device dependent info for acc routine in the module file (PR #77804)

2024-01-11 Thread Valentin Clement バレンタイン クレメン via llvm-branch-commits

https://github.com/clementval created 
https://github.com/llvm/llvm-project/pull/77804

- Move the DeviceType enumeratoon to `Fortran::common` so it can be used 
outside of parser only. 
- Store the device_type dependent information on the symbol and reproduce them 
in the module file. 

>From d417332a130814e34fcc2448430d71b0fd5376b5 Mon Sep 17 00:00:00 2001
From: Valentin Clement 
Date: Wed, 10 Jan 2024 21:26:53 -0800
Subject: [PATCH] [flang][openacc] Carry device dependent info for routine in
 the module file

---
 flang/include/flang/Common/Fortran.h |  4 ++
 flang/include/flang/Parser/dump-parse-tree.h |  3 +-
 flang/include/flang/Parser/parse-tree.h  |  4 +-
 flang/include/flang/Semantics/symbol.h   | 29 +--
 flang/lib/Lower/OpenACC.cpp  | 16 +++---
 flang/lib/Parser/openacc-parsers.cpp | 14 ++---
 flang/lib/Semantics/mod-file.cpp | 54 ++--
 flang/lib/Semantics/resolve-directives.cpp   | 50 +++---
 flang/test/Semantics/OpenACC/acc-module.f90  | 13 +
 9 files changed, 142 insertions(+), 45 deletions(-)

diff --git a/flang/include/flang/Common/Fortran.h 
b/flang/include/flang/Common/Fortran.h
index 4007bfc7994f98..1d3a85e2500733 100644
--- a/flang/include/flang/Common/Fortran.h
+++ b/flang/include/flang/Common/Fortran.h
@@ -87,6 +87,10 @@ ENUM_CLASS(CUDASubprogramAttrs, Host, Device, HostDevice, 
Global, Grid_Global)
 // CUDA data attributes; mutually exclusive
 ENUM_CLASS(CUDADataAttr, Constant, Device, Managed, Pinned, Shared, Texture)
 
+// OpenACC device types
+ENUM_CLASS(
+OpenACCDeviceType, Star, Default, Nvidia, Radeon, Host, Multicore, None)
+
 // OpenMP atomic_default_mem_order clause allowed values
 ENUM_CLASS(OmpAtomicDefaultMemOrderType, SeqCst, AcqRel, Relaxed)
 
diff --git a/flang/include/flang/Parser/dump-parse-tree.h 
b/flang/include/flang/Parser/dump-parse-tree.h
index 1defbf132327c4..d067a7273540f0 100644
--- a/flang/include/flang/Parser/dump-parse-tree.h
+++ b/flang/include/flang/Parser/dump-parse-tree.h
@@ -48,6 +48,7 @@ class ParseTreeDumper {
   NODE(std, uint64_t)
   NODE_ENUM(common, CUDADataAttr)
   NODE_ENUM(common, CUDASubprogramAttrs)
+  NODE_ENUM(common, OpenACCDeviceType)
   NODE(format, ControlEditDesc)
   NODE(format::ControlEditDesc, Kind)
   NODE(format, DerivedTypeDataEditDesc)
@@ -101,7 +102,7 @@ class ParseTreeDumper {
   NODE(parser, AccSelfClause)
   NODE(parser, AccStandaloneDirective)
   NODE(parser, AccDeviceTypeExpr)
-  NODE_ENUM(parser::AccDeviceTypeExpr, Device)
+
   NODE(parser, AccDeviceTypeExprList)
   NODE(parser, AccTileExpr)
   NODE(parser, AccTileExprList)
diff --git a/flang/include/flang/Parser/parse-tree.h 
b/flang/include/flang/Parser/parse-tree.h
index 71195f2bb9ddc4..e9bfb728a2bef6 100644
--- a/flang/include/flang/Parser/parse-tree.h
+++ b/flang/include/flang/Parser/parse-tree.h
@@ -4072,8 +4072,8 @@ struct AccWaitArgument {
 };
 
 struct AccDeviceTypeExpr {
-  ENUM_CLASS(Device, Star, Default, Nvidia, Radeon, Host, Multicore)
-  WRAPPER_CLASS_BOILERPLATE(AccDeviceTypeExpr, Device);
+  WRAPPER_CLASS_BOILERPLATE(
+  AccDeviceTypeExpr, Fortran::common::OpenACCDeviceType);
   CharBlock source;
 };
 
diff --git a/flang/include/flang/Semantics/symbol.h 
b/flang/include/flang/Semantics/symbol.h
index f6f195b6bb95b2..2b6b18e40e6528 100644
--- a/flang/include/flang/Semantics/symbol.h
+++ b/flang/include/flang/Semantics/symbol.h
@@ -112,7 +112,8 @@ class WithBindName {
   bool isExplicitBindName_{false};
 };
 
-class OpenACCRoutineInfo {
+// Device type specific OpenACC routine information
+class OpenACCRoutineDeviceTypeInfo {
 public:
   bool isSeq() const { return isSeq_; }
   void set_isSeq(bool value = true) { isSeq_ = value; }
@@ -124,12 +125,12 @@ class OpenACCRoutineInfo {
   void set_isGang(bool value = true) { isGang_ = value; }
   unsigned gangDim() const { return gangDim_; }
   void set_gangDim(unsigned value) { gangDim_ = value; }
-  bool isNohost() const { return isNohost_; }
-  void set_isNohost(bool value = true) { isNohost_ = value; }
   const std::string *bindName() const {
 return bindName_ ? &*bindName_ : nullptr;
   }
   void set_bindName(std::string &) { bindName_ = std::move(name); }
+  void set_dType(Fortran::common::OpenACCDeviceType dType) { dType_ = dType; }
+  Fortran::common::OpenACCDeviceType dType() const { return dType_; }
 
 private:
   bool isSeq_{false};
@@ -137,8 +138,28 @@ class OpenACCRoutineInfo {
   bool isWorker_{false};
   bool isGang_{false};
   unsigned gangDim_{0};
-  bool isNohost_{false};
   std::optional bindName_;
+  Fortran::common::OpenACCDeviceType dType_{
+  Fortran::common::OpenACCDeviceType::None};
+};
+
+// OpenACC routine information. Device independent info are stored on the
+// OpenACCRoutineInfo instance while device dependent info are stored
+// in as objects in the OpenACCRoutineDeviceTypeInfo list.
+class OpenACCRoutineInfo : public OpenACCRoutineDeviceTypeInfo {
+public:
+  bool