agozillon updated this revision to Diff 509837.
agozillon added a comment.
Squash commits, format and apply requested changes
Repository:
rG LLVM Github Monorepo
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D145264/new/
https://reviews.llvm.org/D145264
Files:
clang/include/clang/Driver/Options.td
clang/lib/Driver/ToolChains/Flang.cpp
flang/include/flang/Frontend/LangOptions.def
flang/include/flang/Tools/CrossToolHelpers.h
flang/lib/Frontend/CompilerInvocation.cpp
flang/lib/Frontend/FrontendActions.cpp
flang/test/Driver/driver-help.f90
flang/test/Driver/omp-frontend-forwarding.f90
flang/test/Lower/OpenMP/rtl-flags.f90
flang/tools/bbc/bbc.cpp
mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
Index: mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
===================================================================
--- mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -1542,6 +1542,38 @@
return bodyGenStatus;
}
+/// Lowers the FlagsAttr which is applied to the module on the device
+/// pass when offloading, this attribute contains OpenMP RTL globals that can
+/// be passed as flags to the frontend, otherwise they are set to default
+LogicalResult convertFlagsAttr(Operation *op, mlir::omp::FlagsAttr attribute,
+ LLVM::ModuleTranslation &moduleTranslation) {
+ llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
+
+ ompBuilder->createGlobalFlag(
+ attribute.getDebugKind() /*LangOpts().OpenMPTargetDebug*/,
+ "__omp_rtl_debug_kind");
+ ompBuilder->createGlobalFlag(
+ attribute
+ .getAssumeTeamsOversubscription() /*LangOpts().OpenMPTeamSubscription*/
+ ,
+ "__omp_rtl_assume_teams_oversubscription");
+ ompBuilder->createGlobalFlag(
+ attribute
+ .getAssumeThreadsOversubscription() /*LangOpts().OpenMPThreadSubscription*/
+ ,
+ "__omp_rtl_assume_threads_oversubscription");
+ ompBuilder->createGlobalFlag(
+ attribute.getAssumeNoThreadState() /*LangOpts().OpenMPNoThreadState*/,
+ "__omp_rtl_assume_no_thread_state");
+ ompBuilder->createGlobalFlag(
+ attribute
+ .getAssumeNoNestedParallelism() /*LangOpts().OpenMPNoNestedParallelism*/
+ ,
+ "__omp_rtl_assume_no_nested_parallelism");
+
+ return success();
+}
+
namespace {
/// Implementation of the dialect interface that converts operations belonging
@@ -1556,10 +1588,34 @@
LogicalResult
convertOperation(Operation *op, llvm::IRBuilderBase &builder,
LLVM::ModuleTranslation &moduleTranslation) const final;
+
+ LogicalResult
+ amendOperation(Operation *op, NamedAttribute attribute,
+ LLVM::ModuleTranslation &moduleTranslation) const final;
};
} // namespace
+/// Given an OpenMP MLIR attribute, create the corresponding LLVM-IR, runtime
+/// calls, or operation amendments
+LogicalResult OpenMPDialectLLVMIRTranslationInterface::amendOperation(
+ Operation *op, NamedAttribute attribute,
+ LLVM::ModuleTranslation &moduleTranslation) const {
+
+ return llvm::TypeSwitch<Attribute, LogicalResult>(attribute.getValue())
+ .Case([&](mlir::omp::FlagsAttr rtlAttr) {
+ return convertFlagsAttr(op, rtlAttr, moduleTranslation);
+ })
+ .Default([&](Attribute attr) {
+ // fall through for omp attributes that do not require lowering and/or
+ // have no concrete definition and thus no type to define a case on
+ // e.g. omp.is_device
+ return success();
+ });
+
+ return failure();
+}
+
/// Given an OpenMP MLIR operation, create the corresponding LLVM IR
/// (including OpenMP runtime calls).
LogicalResult OpenMPDialectLLVMIRTranslationInterface::convertOperation(
Index: flang/tools/bbc/bbc.cpp
===================================================================
--- flang/tools/bbc/bbc.cpp
+++ flang/tools/bbc/bbc.cpp
@@ -129,6 +129,38 @@
llvm::cl::desc("enable openmp device compilation"),
llvm::cl::init(false));
+// A simplified subset of the OpenMP RTL Flags from Flang, only the primary
+// positive options are available, no negative options e.g. fopen_assume* vs
+// fno_open_assume*
+static llvm::cl::opt<uint32_t> setOpenMPTargetDebug(
+ "fopenmp-target-debug",
+ llvm::cl::desc("Enable debugging in the OpenMP offloading device RTL"),
+ llvm::cl::init(0));
+
+static llvm::cl::opt<bool> setOpenMPThreadSubscription(
+ "fopenmp-assume-threads-oversubscription",
+ llvm::cl::desc("Assume work-shared loops do not have more "
+ "iterations than participating threads."),
+ llvm::cl::init(false));
+
+static llvm::cl::opt<bool> setOpenMPTeamSubscription(
+ "fopenmp-assume-teams-oversubscription",
+ llvm::cl::desc("Assume distributed loops do not have more iterations than "
+ "participating teams."),
+ llvm::cl::init(false));
+
+static llvm::cl::opt<bool> setOpenMPNoThreadState(
+ "fopenmp-assume-no-thread-state",
+ llvm::cl::desc(
+ "Assume that no thread in a parallel region will modify an ICV."),
+ llvm::cl::init(false));
+
+static llvm::cl::opt<bool> setOpenMPNoNestedParallelism(
+ "fopenmp-assume-no-nested-parallelism",
+ llvm::cl::desc("Assume that no thread in a parallel region will encounter "
+ "a parallel region."),
+ llvm::cl::init(false));
+
static llvm::cl::opt<bool> enableOpenACC("fopenacc",
llvm::cl::desc("enable openacc"),
llvm::cl::init(false));
@@ -244,8 +276,13 @@
kindMap, loweringOptions, {});
burnside.lower(parseTree, semanticsContext);
mlir::ModuleOp mlirModule = burnside.getModule();
- if (enableOpenMP)
- setOffloadModuleInterfaceAttributes(mlirModule, enableOpenMPDevice);
+ if (enableOpenMP) {
+ auto offloadModuleOpts =
+ OffloadModuleOpts(setOpenMPTargetDebug, setOpenMPTeamSubscription,
+ setOpenMPThreadSubscription, setOpenMPNoThreadState,
+ setOpenMPNoNestedParallelism, enableOpenMPDevice);
+ setOffloadModuleInterfaceAttributes(mlirModule, offloadModuleOpts);
+ }
std::error_code ec;
std::string outputName = outputFilename;
if (!outputName.size())
Index: flang/test/Lower/OpenMP/rtl-flags.f90
===================================================================
--- /dev/null
+++ flang/test/Lower/OpenMP/rtl-flags.f90
@@ -0,0 +1,84 @@
+!RUN: %flang_fc1 -emit-fir -fopenmp -fopenmp-is-device %s -o - | FileCheck %s --check-prefix=DEFAULT-DEVICE-FIR
+!RUN: %flang_fc1 -emit-fir -fopenmp %s -o - | FileCheck %s --check-prefix=DEFAULT-HOST-FIR
+!RUN: %flang_fc1 -emit-llvm -fopenmp -fopenmp-is-device %s -o - | FileCheck %s --check-prefix=DEFAULT-DEVICE-LLVM
+!RUN: %flang_fc1 -emit-fir -fopenmp -fopenmp-target-debug -fopenmp-is-device %s -o - | FileCheck %s --check-prefix=DBG-DEVICE-FIR
+!RUN: %flang_fc1 -emit-llvm -fopenmp -fopenmp-target-debug -fopenmp-is-device %s -o - | FileCheck %s --check-prefix=DBG-DEVICE-LLVM
+!RUN: %flang_fc1 -emit-fir -fopenmp -fopenmp-target-debug=111 -fopenmp-is-device %s -o - | FileCheck %s --check-prefix=DBG-EQ-DEVICE-FIR
+!RUN: %flang_fc1 -emit-llvm -fopenmp -fopenmp-target-debug=111 -fopenmp-is-device %s -o - | FileCheck %s --check-prefix=DBG-EQ-DEVICE-LLVM
+!RUN: %flang_fc1 -emit-fir -fopenmp -fopenmp-assume-teams-oversubscription -fopenmp-is-device %s -o - | FileCheck %s --check-prefix=TEAMS-OSUB-DEVICE-FIR
+!RUN: %flang_fc1 -emit-llvm -fopenmp -fopenmp-assume-teams-oversubscription -fopenmp-is-device %s -o - | FileCheck %s --check-prefix=TEAMS-OSUB-DEVICE-LLVM
+!RUN: %flang_fc1 -emit-fir -fopenmp -fopenmp-assume-threads-oversubscription -fopenmp-is-device %s -o - | FileCheck %s --check-prefix=THREAD-OSUB-DEVICE-FIR
+!RUN: %flang_fc1 -emit-llvm -fopenmp -fopenmp-assume-threads-oversubscription -fopenmp-is-device %s -o - | FileCheck %s --check-prefix=THREAD-OSUB-DEVICE-LLVM
+!RUN: %flang_fc1 -emit-fir -fopenmp -fopenmp-assume-no-thread-state -fopenmp-is-device %s -o - | FileCheck %s --check-prefix=THREAD-STATE-DEVICE-FIR
+!RUN: %flang_fc1 -emit-llvm -fopenmp -fopenmp-assume-no-thread-state -fopenmp-is-device %s -o - | FileCheck %s --check-prefix=THREAD-STATE-DEVICE-LLVM
+!RUN: %flang_fc1 -emit-fir -fopenmp -fopenmp-assume-no-nested-parallelism -fopenmp-is-device %s -o - | FileCheck %s --check-prefix=NEST-PAR-DEVICE-FIR
+!RUN: %flang_fc1 -emit-llvm -fopenmp -fopenmp-assume-no-nested-parallelism -fopenmp-is-device %s -o - | FileCheck %s --check-prefix=NEST-PAR-DEVICE-LLVM
+!RUN: %flang_fc1 -emit-fir -fopenmp -fopenmp-target-debug -fopenmp-assume-teams-oversubscription -fopenmp-assume-no-nested-parallelism -fopenmp-assume-threads-oversubscription -fopenmp-assume-no-thread-state -fopenmp-is-device %s -o - | FileCheck %s --check-prefix=ALL-DEVICE-FIR
+!RUN: %flang_fc1 -emit-llvm -fopenmp -fopenmp-target-debug -fopenmp-assume-teams-oversubscription -fopenmp-assume-no-nested-parallelism -fopenmp-assume-threads-oversubscription -fopenmp-assume-no-thread-state -fopenmp-is-device %s -o - | FileCheck %s --check-prefix=ALL-DEVICE-LLVM
+!RUN: bbc -emit-fir -fopenmp -fopenmp-is-device -o - %s | FileCheck %s --check-prefix=DEFAULT-DEVICE-FIR
+!RUN: bbc -emit-fir -fopenmp -o - %s | FileCheck %s --check-prefix=DEFAULT-HOST-FIR
+!RUN: bbc -emit-fir -fopenmp -fopenmp-target-debug=111 -fopenmp-is-device -o - %s | FileCheck %s --check-prefix=DBG-EQ-DEVICE-FIR
+!RUN: bbc -emit-fir -fopenmp -fopenmp-assume-teams-oversubscription -fopenmp-is-device -o - %s | FileCheck %s --check-prefix=TEAMS-OSUB-DEVICE-FIR
+!RUN: bbc -emit-fir -fopenmp -fopenmp-assume-threads-oversubscription -fopenmp-is-device -o - %s | FileCheck %s --check-prefix=THREAD-OSUB-DEVICE-FIR
+!RUN: bbc -emit-fir -fopenmp -fopenmp-assume-no-thread-state -fopenmp-is-device -o - %s | FileCheck %s --check-prefix=THREAD-STATE-DEVICE-FIR
+!RUN: bbc -emit-fir -fopenmp -fopenmp-assume-no-nested-parallelism -fopenmp-is-device -o - %s | FileCheck %s --check-prefix=NEST-PAR-DEVICE-FIR
+!RUN: bbc -emit-fir -fopenmp -fopenmp-target-debug=1 -fopenmp-assume-teams-oversubscription -fopenmp-assume-no-nested-parallelism -fopenmp-assume-threads-oversubscription -fopenmp-assume-no-thread-state -fopenmp-is-device -o - %s | FileCheck %s --check-prefix=ALL-DEVICE-FIR
+
+!DEFAULT-DEVICE-FIR: module attributes {{{.*}}, omp.flags = #omp.flags<>, omp.is_device = #omp.isdevice<is_device = true>{{.*}}}
+!DEFAULT-HOST-FIR: module attributes {{{.*}}, omp.is_device = #omp.isdevice<is_device = false>{{.*}}}
+!DEFAULT-DEVICE-LLVM: @__omp_rtl_debug_kind = weak_odr hidden constant i32 0
+!DEFAULT-DEVICE-LLVM: @__omp_rtl_assume_teams_oversubscription = weak_odr hidden constant i32 0
+!DEFAULT-DEVICE-LLVM: @__omp_rtl_assume_threads_oversubscription = weak_odr hidden constant i32 0
+!DEFAULT-DEVICE-LLVM: @__omp_rtl_assume_no_thread_state = weak_odr hidden constant i32 0
+!DEFAULT-DEVICE-LLVM: @__omp_rtl_assume_no_nested_parallelism = weak_odr hidden constant i32 0
+
+!DBG-DEVICE-FIR: module attributes {{{.*}}, omp.flags = #omp.flags<debug_kind = 1>{{.*}}}
+!DBG-DEVICE-LLVM: @__omp_rtl_debug_kind = weak_odr hidden constant i32 1
+!DBG-DEVICE-LLVM: @__omp_rtl_assume_teams_oversubscription = weak_odr hidden constant i32 0
+!DBG-DEVICE-LLVM: @__omp_rtl_assume_threads_oversubscription = weak_odr hidden constant i32 0
+!DBG-DEVICE-LLVM: @__omp_rtl_assume_no_thread_state = weak_odr hidden constant i32 0
+!DBG-DEVICE-LLVM: @__omp_rtl_assume_no_nested_parallelism = weak_odr hidden constant i32 0
+
+!DBG-EQ-DEVICE-FIR: module attributes {{{.*}}, omp.flags = #omp.flags<debug_kind = 111>{{.*}}}
+!DBG-EQ-DEVICE-LLVM: @__omp_rtl_debug_kind = weak_odr hidden constant i32 111
+!DBG-EQ-DEVICE-LLVM: @__omp_rtl_assume_teams_oversubscription = weak_odr hidden constant i32 0
+!DBG-EQ-DEVICE-LLVM: @__omp_rtl_assume_threads_oversubscription = weak_odr hidden constant i32 0
+!DBG-EQ-DEVICE-LLVM: @__omp_rtl_assume_no_thread_state = weak_odr hidden constant i32 0
+!DBG-EQ-DEVICE-LLVM: @__omp_rtl_assume_no_nested_parallelism = weak_odr hidden constant i32 0
+
+!TEAMS-OSUB-DEVICE-FIR: module attributes {{{.*}}, omp.flags = #omp.flags<assume_teams_oversubscription = true>{{.*}}}
+!TEAMS-OSUB-DEVICE-LLVM: @__omp_rtl_debug_kind = weak_odr hidden constant i32 0
+!TEAMS-OSUB-DEVICE-LLVM: @__omp_rtl_assume_teams_oversubscription = weak_odr hidden constant i32 1
+!TEAMS-OSUB-DEVICE-LLVM: @__omp_rtl_assume_threads_oversubscription = weak_odr hidden constant i32 0
+!TEAMS-OSUB-DEVICE-LLVM: @__omp_rtl_assume_no_thread_state = weak_odr hidden constant i32 0
+!TEAMS-OSUB-DEVICE-LLVM: @__omp_rtl_assume_no_nested_parallelism = weak_odr hidden constant i32 0
+
+!THREAD-OSUB-DEVICE-FIR: module attributes {{{.*}}, omp.flags = #omp.flags<assume_threads_oversubscription = true>{{.*}}}
+!THREAD-OSUB-DEVICE-LLVM: @__omp_rtl_debug_kind = weak_odr hidden constant i32 0
+!THREAD-OSUB-DEVICE-LLVM: @__omp_rtl_assume_teams_oversubscription = weak_odr hidden constant i32 0
+!THREAD-OSUB-DEVICE-LLVM: @__omp_rtl_assume_threads_oversubscription = weak_odr hidden constant i32 1
+!THREAD-OSUB-DEVICE-LLVM: @__omp_rtl_assume_no_thread_state = weak_odr hidden constant i32 0
+!THREAD-OSUB-DEVICE-LLVM: @__omp_rtl_assume_no_nested_parallelism = weak_odr hidden constant i32 0
+
+!THREAD-STATE-DEVICE-FIR: module attributes {{{.*}}, omp.flags = #omp.flags<assume_no_thread_state = true>{{.*}}}
+!THREAD-STATE-DEVICE-LLVM: @__omp_rtl_debug_kind = weak_odr hidden constant i32 0
+!THREAD-STATE-DEVICE-LLVM: @__omp_rtl_assume_teams_oversubscription = weak_odr hidden constant i32 0
+!THREAD-STATE-DEVICE-LLVM: @__omp_rtl_assume_threads_oversubscription = weak_odr hidden constant i32 0
+!THREAD-STATE-DEVICE-LLVM: @__omp_rtl_assume_no_thread_state = weak_odr hidden constant i32 1
+!THREAD-STATE-DEVICE-LLVM: @__omp_rtl_assume_no_nested_parallelism = weak_odr hidden constant i32 0
+
+!NEST-PAR-DEVICE-FIR: module attributes {{{.*}}, omp.flags = #omp.flags<assume_no_nested_parallelism = true>{{.*}}}
+!NEST-PAR-DEVICE-LLVM: @__omp_rtl_debug_kind = weak_odr hidden constant i32 0
+!NEST-PAR-DEVICE-LLVM: @__omp_rtl_assume_teams_oversubscription = weak_odr hidden constant i32 0
+!NEST-PAR-DEVICE-LLVM: @__omp_rtl_assume_threads_oversubscription = weak_odr hidden constant i32 0
+!NEST-PAR-DEVICE-LLVM: @__omp_rtl_assume_no_thread_state = weak_odr hidden constant i32 0
+!NEST-PAR-DEVICE-LLVM: @__omp_rtl_assume_no_nested_parallelism = weak_odr hidden constant i32 1
+
+!ALL-DEVICE-FIR: module attributes {{{.*}}, omp.flags = #omp.flags<debug_kind = 1, assume_teams_oversubscription = true, assume_threads_oversubscription = true, assume_no_thread_state = true, assume_no_nested_parallelism = true>{{.*}}}
+!ALL-DEVICE-LLVM: @__omp_rtl_debug_kind = weak_odr hidden constant i32 1
+!ALL-DEVICE-LLVM: @__omp_rtl_assume_teams_oversubscription = weak_odr hidden constant i32 1
+!ALL-DEVICE-LLVM: @__omp_rtl_assume_threads_oversubscription = weak_odr hidden constant i32 1
+!ALL-DEVICE-LLVM: @__omp_rtl_assume_no_thread_state = weak_odr hidden constant i32 1
+!ALL-DEVICE-LLVM: @__omp_rtl_assume_no_nested_parallelism = weak_odr hidden constant i32 1
+subroutine omp_subroutine()
+end subroutine omp_subroutine
Index: flang/test/Driver/omp-frontend-forwarding.f90
===================================================================
--- flang/test/Driver/omp-frontend-forwarding.f90
+++ flang/test/Driver/omp-frontend-forwarding.f90
@@ -20,3 +20,50 @@
! CHECK-OPENMP-EMBED-NEXT: "{{[^"]*}}flang-new" "-fc1" "-triple" "amdgcn-amd-amdhsa" {{.*}} "-fopenmp" {{.*}} "-fopenmp-is-device" {{.*}}.f90"
! CHECK-OPENMP-EMBED: "{{[^"]*}}clang-offload-packager{{.*}} "--image=file={{.*}}.bc,triple=amdgcn-amd-amdhsa,arch=gfx90a,kind=openmp"
! CHECK-OPENMP-EMBED-NEXT: "{{[^"]*}}flang-new" "-fc1" "-triple" "aarch64-unknown-linux-gnu" {{.*}} "-fopenmp" {{.*}} "-fembed-offload-object={{.*}}.out" {{.*}}.bc"
+
+! Test -fopenmp with offload for RTL Flag Options
+! RUN: %flang -### %s -o %t 2>&1 \
+! RUN: -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa \
+! RUN: -fopenmp-assume-threads-oversubscription \
+! RUN: | FileCheck %s --check-prefixes=CHECK-THREADS-OVS
+! CHECK-THREADS-OVS: "{{[^"]*}}flang-new" "-fc1" {{.*}} "-fopenmp" {{.*}} "-fopenmp-is-device" "-fopenmp-assume-threads-oversubscription" {{.*}}.f90"
+
+! RUN: %flang -### %s -o %t 2>&1 \
+! RUN: -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa \
+! RUN: -fopenmp-assume-teams-oversubscription \
+! RUN: | FileCheck %s --check-prefixes=CHECK-TEAMS-OVS
+! CHECK-TEAMS-OVS: "{{[^"]*}}flang-new" "-fc1" {{.*}} "-fopenmp" {{.*}} "-fopenmp-is-device" "-fopenmp-assume-teams-oversubscription" {{.*}}.f90"
+
+! RUN: %flang -### %s -o %t 2>&1 \
+! RUN: -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa \
+! RUN: -fopenmp-assume-no-nested-parallelism \
+! RUN: | FileCheck %s --check-prefixes=CHECK-NEST-PAR
+! CHECK-NEST-PAR: "{{[^"]*}}flang-new" "-fc1" {{.*}} "-fopenmp" {{.*}} "-fopenmp-is-device" "-fopenmp-assume-no-nested-parallelism" {{.*}}.f90"
+
+! RUN: %flang -### %s -o %t 2>&1 \
+! RUN: -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa \
+! RUN: -fopenmp-assume-no-thread-state \
+! RUN: | FileCheck %s --check-prefixes=CHECK-THREAD-STATE
+! CHECK-THREAD-STATE: "{{[^"]*}}flang-new" "-fc1" {{.*}} "-fopenmp" {{.*}} "-fopenmp-is-device" "-fopenmp-assume-no-thread-state" {{.*}}.f90"
+
+! RUN: %flang -### %s -o %t 2>&1 \
+! RUN: -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa \
+! RUN: -fopenmp-target-debug \
+! RUN: | FileCheck %s --check-prefixes=CHECK-TARGET-DEBUG
+! CHECK-TARGET-DEBUG: "{{[^"]*}}flang-new" "-fc1" {{.*}} "-fopenmp" {{.*}} "-fopenmp-is-device" "-fopenmp-target-debug" {{.*}}.f90"
+
+! RUN: %flang -### %s -o %t 2>&1 \
+! RUN: -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa \
+! RUN: -fopenmp-target-debug \
+! RUN: | FileCheck %s --check-prefixes=CHECK-TARGET-DEBUG
+! CHECK-TARGET-DEBUG-EQ: "{{[^"]*}}flang-new" "-fc1" {{.*}} "-fopenmp" {{.*}} "-fopenmp-is-device" "-fopenmp-target-debug=111" {{.*}}.f90"
+
+! RUN: %flang -S -### %s -o %t 2>&1 \
+! RUN: -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa \
+! RUN: -fopenmp-target-debug -fopenmp-assume-threads-oversubscription \
+! RUN: -fopenmp-assume-teams-oversubscription -fopenmp-assume-no-nested-parallelism \
+! RUN: -fopenmp-assume-no-thread-state \
+! RUN: | FileCheck %s --check-prefixes=CHECK-RTL-ALL
+! CHECK-RTL-ALL: "{{[^"]*}}flang-new" "-fc1" {{.*}} "-fopenmp" {{.*}} "-fopenmp-is-device" "-fopenmp-target-debug" "-fopenmp-assume-teams-oversubscription"
+! CHECK-RTL-ALL: "-fopenmp-assume-threads-oversubscription" "-fopenmp-assume-no-thread-state" "-fopenmp-assume-no-nested-parallelism"
+! CHECK-RTL-ALL: {{.*}}.f90"
Index: flang/test/Driver/driver-help.f90
===================================================================
--- flang/test/Driver/driver-help.f90
+++ flang/test/Driver/driver-help.f90
@@ -145,6 +145,7 @@
! HELP-FC1-NEXT: -fno-stack-arrays Allocate array temporaries on the heap (default)
! HELP-FC1-NEXT: -fopenacc Enable OpenACC
! HELP-FC1-NEXT: -fopenmp-is-device Generate code only for an OpenMP target device.
+! HELP-FC1-NEXT: -fopenmp-target-debug Enable debugging in the OpenMP offloading device RTL
! HELP-FC1-NEXT: -fopenmp Parse OpenMP pragmas and generate parallel code.
! HELP-FC1-NEXT: -fpass-plugin=<dsopath> Load pass plugin from a dynamic shared object file (only with new pass manager).
! HELP-FC1-NEXT: -freciprocal-math Allow division operations to be reassociated
Index: flang/lib/Frontend/FrontendActions.cpp
===================================================================
--- flang/lib/Frontend/FrontendActions.cpp
+++ flang/lib/Frontend/FrontendActions.cpp
@@ -278,8 +278,8 @@
if (ci.getInvocation().getFrontendOpts().features.IsEnabled(
Fortran::common::LanguageFeature::OpenMP)) {
- setOffloadModuleInterfaceAttributes(
- *mlirModule, ci.getInvocation().getLangOpts().OpenMPIsDevice);
+ setOffloadModuleInterfaceAttributes(*mlirModule,
+ ci.getInvocation().getLangOpts());
}
if (!setUpTargetMachine())
Index: flang/lib/Frontend/CompilerInvocation.cpp
===================================================================
--- flang/lib/Frontend/CompilerInvocation.cpp
+++ flang/lib/Frontend/CompilerInvocation.cpp
@@ -721,6 +721,39 @@
if (args.hasArg(clang::driver::options::OPT_fopenmp_is_device)) {
res.getLangOpts().OpenMPIsDevice = 1;
+
+ if (args.hasFlag(
+ clang::driver::options::OPT_fopenmp_assume_teams_oversubscription,
+ clang::driver::options::
+ OPT_fno_openmp_assume_teams_oversubscription,
+ /*Default=*/false))
+ res.getLangOpts().OpenMPTeamSubscription = true;
+
+ if (args.hasArg(
+ clang::driver::options::OPT_fopenmp_assume_no_thread_state))
+ res.getLangOpts().OpenMPNoThreadState = 1;
+
+ if (args.hasArg(
+ clang::driver::options::OPT_fopenmp_assume_no_nested_parallelism))
+ res.getLangOpts().OpenMPNoNestedParallelism = 1;
+
+ if (args.hasFlag(clang::driver::options::
+ OPT_fopenmp_assume_threads_oversubscription,
+ clang::driver::options::
+ OPT_fno_openmp_assume_threads_oversubscription,
+ /*Default=*/false))
+ res.getLangOpts().OpenMPThreadSubscription = true;
+
+ if ((args.hasArg(clang::driver::options::OPT_fopenmp_target_debug) ||
+ args.hasArg(clang::driver::options::OPT_fopenmp_target_debug_EQ))) {
+ res.getLangOpts().OpenMPTargetDebug = getLastArgIntValue(
+ args, clang::driver::options::OPT_fopenmp_target_debug_EQ,
+ res.getLangOpts().OpenMPTargetDebug, diags);
+
+ if (!res.getLangOpts().OpenMPTargetDebug &&
+ args.hasArg(clang::driver::options::OPT_fopenmp_target_debug))
+ res.getLangOpts().OpenMPTargetDebug = 1;
+ }
}
}
Index: flang/include/flang/Tools/CrossToolHelpers.h
===================================================================
--- flang/include/flang/Tools/CrossToolHelpers.h
+++ flang/include/flang/Tools/CrossToolHelpers.h
@@ -13,17 +13,53 @@
#ifndef FORTRAN_TOOLS_CROSS_TOOL_HELPERS_H
#define FORTRAN_TOOLS_CROSS_TOOL_HELPERS_H
+#include "flang/Frontend/LangOptions.h"
+#include <cstdint>
+
#include "mlir/Dialect/OpenMP/OpenMPDialect.h"
#include "mlir/IR/BuiltinOps.h"
+struct OffloadModuleOpts {
+ OffloadModuleOpts() {}
+ OffloadModuleOpts(uint32_t OpenMPTargetDebug, bool OpenMPTeamSubscription,
+ bool OpenMPThreadSubscription, bool OpenMPNoThreadState,
+ bool OpenMPNoNestedParallelism, bool OpenMPIsDevice)
+ : OpenMPTargetDebug(OpenMPTargetDebug),
+ OpenMPTeamSubscription(OpenMPTeamSubscription),
+ OpenMPThreadSubscription(OpenMPThreadSubscription),
+ OpenMPNoThreadState(OpenMPNoThreadState),
+ OpenMPNoNestedParallelism(OpenMPNoNestedParallelism),
+ OpenMPIsDevice(OpenMPIsDevice) {}
+
+ OffloadModuleOpts(Fortran::frontend::LangOptions &Opts)
+ : OpenMPTargetDebug(Opts.OpenMPTargetDebug),
+ OpenMPTeamSubscription(Opts.OpenMPTeamSubscription),
+ OpenMPThreadSubscription(Opts.OpenMPThreadSubscription),
+ OpenMPNoThreadState(Opts.OpenMPNoThreadState),
+ OpenMPNoNestedParallelism(Opts.OpenMPNoNestedParallelism),
+ OpenMPIsDevice(Opts.OpenMPIsDevice) {}
+
+ uint32_t OpenMPTargetDebug = 0;
+ bool OpenMPTeamSubscription = false;
+ bool OpenMPThreadSubscription = false;
+ bool OpenMPNoThreadState = false;
+ bool OpenMPNoNestedParallelism = false;
+ bool OpenMPIsDevice = false;
+};
+
// Shares assinging of the OpenMP OffloadModuleInterface and its assorted
// attributes accross Flang tools (bbc/flang)
void setOffloadModuleInterfaceAttributes(
- mlir::ModuleOp &module, bool isDevice) {
+ mlir::ModuleOp &module, OffloadModuleOpts Opts) {
// Should be registered by the OpenMPDialect
if (auto offloadMod = llvm::dyn_cast<mlir::omp::OffloadModuleInterface>(
module.getOperation())) {
- offloadMod.setIsDevice(isDevice);
+ offloadMod.setIsDevice(Opts.OpenMPIsDevice);
+ if (Opts.OpenMPIsDevice) {
+ offloadMod.setFlags(Opts.OpenMPTargetDebug, Opts.OpenMPTeamSubscription,
+ Opts.OpenMPThreadSubscription, Opts.OpenMPNoThreadState,
+ Opts.OpenMPNoNestedParallelism);
+ }
}
}
Index: flang/include/flang/Frontend/LangOptions.def
===================================================================
--- flang/include/flang/Frontend/LangOptions.def
+++ flang/include/flang/Frontend/LangOptions.def
@@ -36,6 +36,18 @@
LANGOPT(ReciprocalMath, 1, false)
/// Generate code only for OpenMP target device
LANGOPT(OpenMPIsDevice, 1, false)
+/// Enable debugging in the OpenMP offloading device RTL
+LANGOPT(OpenMPTargetDebug, 32, 0)
+/// Assume work-shared loops do not have more iterations than participating
+/// threads.
+LANGOPT(OpenMPThreadSubscription, 1, 0)
+/// Assume distributed loops do not have more iterations than participating
+/// teams.
+LANGOPT(OpenMPTeamSubscription, 1, 0)
+/// Assume that no thread in a parallel region will modify an ICV.
+LANGOPT(OpenMPNoThreadState, 1, 0)
+/// Assume that no thread in a parallel region will encounter a parallel region
+LANGOPT(OpenMPNoNestedParallelism, 1, 0)
#undef LANGOPT
#undef ENUM_LANGOPT
Index: clang/lib/Driver/ToolChains/Flang.cpp
===================================================================
--- clang/lib/Driver/ToolChains/Flang.cpp
+++ clang/lib/Driver/ToolChains/Flang.cpp
@@ -157,6 +157,27 @@
// generating code for a device, so that only the relevant code is
// emitted.
CmdArgs.push_back("-fopenmp-is-device");
+
+ // When in OpenMP offloading mode, enable debugging on the device.
+ Args.AddAllArgs(CmdArgs, options::OPT_fopenmp_target_debug_EQ);
+ if (Args.hasFlag(options::OPT_fopenmp_target_debug,
+ options::OPT_fno_openmp_target_debug, /*Default=*/false))
+ CmdArgs.push_back("-fopenmp-target-debug");
+
+ // When in OpenMP offloading mode, forward assumptions information about
+ // thread and team counts in the device.
+ if (Args.hasFlag(options::OPT_fopenmp_assume_teams_oversubscription,
+ options::OPT_fno_openmp_assume_teams_oversubscription,
+ /*Default=*/false))
+ CmdArgs.push_back("-fopenmp-assume-teams-oversubscription");
+ if (Args.hasFlag(options::OPT_fopenmp_assume_threads_oversubscription,
+ options::OPT_fno_openmp_assume_threads_oversubscription,
+ /*Default=*/false))
+ CmdArgs.push_back("-fopenmp-assume-threads-oversubscription");
+ if (Args.hasArg(options::OPT_fopenmp_assume_no_thread_state))
+ CmdArgs.push_back("-fopenmp-assume-no-thread-state");
+ if (Args.hasArg(options::OPT_fopenmp_assume_no_nested_parallelism))
+ CmdArgs.push_back("-fopenmp-assume-no-nested-parallelism");
}
}
Index: clang/include/clang/Driver/Options.td
===================================================================
--- clang/include/clang/Driver/Options.td
+++ clang/include/clang/Driver/Options.td
@@ -2680,26 +2680,39 @@
Flags<[CC1Option, NoArgumentUnused, HelpHidden]>;
def fopenmp_cuda_teams_reduction_recs_num_EQ : Joined<["-"], "fopenmp-cuda-teams-reduction-recs-num=">, Group<f_Group>,
Flags<[CC1Option, NoArgumentUnused, HelpHidden]>;
-def fopenmp_target_debug : Flag<["-"], "fopenmp-target-debug">, Group<f_Group>, Flags<[CC1Option, NoArgumentUnused]>,
+
+//===----------------------------------------------------------------------===//
+// Shared cc1 + fc1 OpenMP Target Options
+//===----------------------------------------------------------------------===//
+
+let Flags = [CC1Option, FC1Option, NoArgumentUnused] in {
+let Group = f_Group in {
+
+def fopenmp_target_debug : Flag<["-"], "fopenmp-target-debug">,
HelpText<"Enable debugging in the OpenMP offloading device RTL">;
-def fno_openmp_target_debug : Flag<["-"], "fno-openmp-target-debug">, Group<f_Group>, Flags<[NoArgumentUnused]>;
-def fopenmp_target_debug_EQ : Joined<["-"], "fopenmp-target-debug=">, Group<f_Group>, Flags<[CC1Option, NoArgumentUnused, HelpHidden]>;
-def fopenmp_assume_teams_oversubscription : Flag<["-"], "fopenmp-assume-teams-oversubscription">,
- Group<f_Group>, Flags<[CC1Option, NoArgumentUnused, HelpHidden]>;
-def fopenmp_assume_threads_oversubscription : Flag<["-"], "fopenmp-assume-threads-oversubscription">,
- Group<f_Group>, Flags<[CC1Option, NoArgumentUnused, HelpHidden]>;
-def fno_openmp_assume_teams_oversubscription : Flag<["-"], "fno-openmp-assume-teams-oversubscription">,
- Group<f_Group>, Flags<[CC1Option, NoArgumentUnused, HelpHidden]>;
-def fno_openmp_assume_threads_oversubscription : Flag<["-"], "fno-openmp-assume-threads-oversubscription">,
- Group<f_Group>, Flags<[CC1Option, NoArgumentUnused, HelpHidden]>;
-def fopenmp_assume_no_thread_state : Flag<["-"], "fopenmp-assume-no-thread-state">, Group<f_Group>,
- Flags<[CC1Option, NoArgumentUnused, HelpHidden]>,
+def fno_openmp_target_debug : Flag<["-"], "fno-openmp-target-debug">;
+
+} // let Group = f_Group
+} // let Flags = [CC1Option, FC1Option, NoArgumentUnused]
+
+let Flags = [CC1Option, FC1Option, NoArgumentUnused, HelpHidden] in {
+let Group = f_Group in {
+
+def fopenmp_target_debug_EQ : Joined<["-"], "fopenmp-target-debug=">;
+def fopenmp_assume_teams_oversubscription : Flag<["-"], "fopenmp-assume-teams-oversubscription">;
+def fopenmp_assume_threads_oversubscription : Flag<["-"], "fopenmp-assume-threads-oversubscription">;
+def fno_openmp_assume_teams_oversubscription : Flag<["-"], "fno-openmp-assume-teams-oversubscription">;
+def fno_openmp_assume_threads_oversubscription : Flag<["-"], "fno-openmp-assume-threads-oversubscription">;
+def fopenmp_assume_no_thread_state : Flag<["-"], "fopenmp-assume-no-thread-state">,
HelpText<"Assert no thread in a parallel region modifies an ICV">,
MarshallingInfoFlag<LangOpts<"OpenMPNoThreadState">>;
-def fopenmp_assume_no_nested_parallelism : Flag<["-"], "fopenmp-assume-no-nested-parallelism">, Group<f_Group>,
- Flags<[CC1Option, NoArgumentUnused, HelpHidden]>,
+def fopenmp_assume_no_nested_parallelism : Flag<["-"], "fopenmp-assume-no-nested-parallelism">,
HelpText<"Assert no nested parallel regions in the GPU">,
MarshallingInfoFlag<LangOpts<"OpenMPNoNestedParallelism">>;
+
+} // let Group = f_Group
+} // let Flags = [CC1Option, FC1Option, NoArgumentUnused, HelpHidden]
+
def fopenmp_offload_mandatory : Flag<["-"], "fopenmp-offload-mandatory">, Group<f_Group>,
Flags<[CC1Option, NoArgumentUnused]>,
HelpText<"Do not create a host fallback if offloading to the device fails.">,
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits