jdoerfert created this revision.
jdoerfert added reviewers: jhuber6, fghanim, JonChesterfield, grokos, 
AndreyChurbanov, ye-luo, tianshilei1992, ggeorgakoudis.
Herald added subscribers: llvm-commits, cfe-commits, sstefan1, guansong, bollu, 
hiraditya, yaxunl.
Herald added projects: clang, LLVM.

The optimization introduced in D83271 <https://reviews.llvm.org/D83271> will 
verify a safety and a
precautionary condition. Until we find a proper solution for the former
we allow users to disable the check. If they do it and violate the
problematic (but probably unlikely) case, they will see UB, most likely
a crash when a non-function pointer is called. The flag has the word
`unsafe` in the name and the remarks are adjusted to provide some
insight what is happening.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D83832

Files:
  clang/test/OpenMP/remarks_parallel_in_target_state_machine.c
  llvm/lib/Transforms/IPO/OpenMPOpt.cpp

Index: llvm/lib/Transforms/IPO/OpenMPOpt.cpp
===================================================================
--- llvm/lib/Transforms/IPO/OpenMPOpt.cpp
+++ llvm/lib/Transforms/IPO/OpenMPOpt.cpp
@@ -36,6 +36,12 @@
     "openmp-opt-disable", cl::ZeroOrMore,
     cl::desc("Disable OpenMP specific optimizations."), cl::Hidden,
     cl::init(false));
+static cl::opt<bool> UnsafeAssumeNoExternalTargetRegions(
+    "openmp-unsafe-assume-no-external-target-regions", cl::ZeroOrMore,
+    cl::desc("[UNSAFE!] Assume no external target regions when specializing "
+             "OpenMP parallel regions to avoid spurious register usage issues, "
+             "combine with `-Rpass=openmp`"),
+    cl::Hidden, cl::init(false));
 
 static cl::opt<bool> PrintICVValues("openmp-print-icv-values", cl::init(false),
                                     cl::Hidden);
@@ -1096,15 +1102,23 @@
     if (!K) {
       {
         auto Remark = [&](OptimizationRemark OR) {
-          return OR << "Parallel region is not known to be called from a "
+          return OR << (UnsafeAssumeNoExternalTargetRegions ? "[UNSAFE] " : "")
+                    << "Parallel region is not known to be called from a "
                        "unique single target region, maybe the surrounding "
-                       "function has external linkage?; will not attempt to "
-                       "rewrite the state machine use.";
+                       "function has external linkage?"
+                    << (UnsafeAssumeNoExternalTargetRegions
+                            ? "; will rewrite the state machine use due to "
+                              "command line flag, this can lead to undefined "
+                              "behavior if the parallel region is called from "
+                              "a target region outside this translation unit."
+                            : "; will not attempt to rewrite the state machine "
+                              "use.");
         };
         emitRemarkOnFunction(F, "OpenMPParallelRegionInMultipleKernesl",
                              Remark);
       }
-      continue;
+      if (!UnsafeAssumeNoExternalTargetRegions)
+        continue;
     }
 
     // We now know F is a parallel body function called only from the kernel K.
@@ -1120,10 +1134,12 @@
                      "(parallel region ID: "
                   << ore::NV("OpenMPParallelRegion", F->getName())
                   << ", kernel ID: "
-                  << ore::NV("OpenMPTargetRegion", K->getName()) << ")";
+                  << ore::NV("OpenMPTargetRegion", K ? K->getName() : "<NONE>")
+                  << ")";
       };
       emitRemarkOnFunction(F, "OpenMPParallelRegionInNonSPMD",
                            RemarkParalleRegion);
+
       auto RemarkKernel = [&](OptimizationRemark OR) {
         return OR << "Target region containing the parallel region that is "
                      "specialized. (parallel region ID: "
@@ -1131,7 +1147,8 @@
                   << ", kernel ID: "
                   << ore::NV("OpenMPTargetRegion", K->getName()) << ")";
       };
-      emitRemarkOnFunction(K, "OpenMPParallelRegionInNonSPMD", RemarkKernel);
+      if (K)
+        emitRemarkOnFunction(K, "OpenMPParallelRegionInNonSPMD", RemarkKernel);
     }
 
     Module &M = *F->getParent();
Index: clang/test/OpenMP/remarks_parallel_in_target_state_machine.c
===================================================================
--- clang/test/OpenMP/remarks_parallel_in_target_state_machine.c
+++ clang/test/OpenMP/remarks_parallel_in_target_state_machine.c
@@ -1,31 +1,35 @@
-// RUN: %clang_cc1                                 -verify=host -Rpass=openmp -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc
-// RUN: %clang_cc1                                 -verify      -Rpass=openmp -fopenmp -O2 -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t.out
-// RUN: %clang_cc1 -fexperimental-new-pass-manager -verify      -Rpass=openmp -fopenmp -O2 -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t.out
+// RUN: %clang_cc1                                 -verify=host                                                              -Rpass=openmp -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1                                 -verify=all,safe                                                          -Rpass=openmp -fopenmp -O2 -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t.out
+// RUN: %clang_cc1 -fexperimental-new-pass-manager -verify=all,safe                                                          -Rpass=openmp -fopenmp -O2 -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t.out
+// RUN: %clang_cc1                                 -verify=all,force -mllvm -openmp-unsafe-assume-no-external-target-regions -Rpass=openmp -fopenmp -O2 -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t.out
+// RUN: %clang_cc1 -fexperimental-new-pass-manager -verify=all,force -mllvm -openmp-unsafe-assume-no-external-target-regions -Rpass=openmp -fopenmp -O2 -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t.out
 
 // host-no-diagnostics
 
 void bar(void) {
 #pragma omp parallel // #1                                                                                                                                                                                                                                                                                                                                           \
-                     // expected-remark@#1 {{Found a parallel region that is called in a target region but not part of a combined target construct nor nesed inside a target construct without intermediate code. This can lead to excessive register usage for unrelated target regions in the same translation unit due to spurious call edges assumed by ptxas.}} \
-                     // expected-remark@#1 {{Parallel region is not known to be called from a unique single target region, maybe the surrounding function has external linkage?; will not attempt to rewrite the state machine use.}}
+                     // all-remark@#1 {{Found a parallel region that is called in a target region but not part of a combined target construct nor nesed inside a target construct without intermediate code. This can lead to excessive register usage for unrelated target regions in the same translation unit due to spurious call edges assumed by ptxas.}} \
+                     // safe-remark@#1 {{Parallel region is not known to be called from a unique single target region, maybe the surrounding function has external linkage?; will not attempt to rewrite the state machine use.}}         \
+                     // force-remark@#1 {{[UNSAFE] Parallel region is not known to be called from a unique single target region, maybe the surrounding function has external linkage?; will rewrite the state machine use due to command line flag, this can lead to undefined behavior if the parallel region is called from a target region outside this translation unit.}} \
+                     // force-remark@#1 {{Specialize parallel region that is only reached from a single target region to avoid spurious call edges and excessive register usage in other target regions. (parallel region ID: __omp_outlined__2_wrapper, kernel ID: <NONE>}}
   {
   }
 }
 
 void foo(void) {
 #pragma omp target teams // #2                                                                                                                                                                      \
-                         // expected-remark@#2 {{Target region containing the parallel region that is specialized. (parallel region ID: __omp_outlined__1_wrapper, kernel ID: __omp_offloading_22}} \
-                         // expected-remark@#2 {{Target region containing the parallel region that is specialized. (parallel region ID: __omp_outlined__3_wrapper, kernel ID: __omp_offloading_22}}
+                         // all-remark@#2 {{Target region containing the parallel region that is specialized. (parallel region ID: __omp_outlined__1_wrapper, kernel ID: __omp_offloading_22}} \
+                         // all-remark@#2 {{Target region containing the parallel region that is specialized. (parallel region ID: __omp_outlined__3_wrapper, kernel ID: __omp_offloading_22}}
   {
 #pragma omp parallel // #3                                                                                                                                                                                                                                                                                                                                           \
-                     // expected-remark@#3 {{Found a parallel region that is called in a target region but not part of a combined target construct nor nesed inside a target construct without intermediate code. This can lead to excessive register usage for unrelated target regions in the same translation unit due to spurious call edges assumed by ptxas.}} \
-                     // expected-remark@#3 {{Specialize parallel region that is only reached from a single target region to avoid spurious call edges and excessive register usage in other target regions. (parallel region ID: __omp_outlined__1_wrapper, kernel ID: __omp_offloading_22}}
+                     // all-remark@#3 {{Found a parallel region that is called in a target region but not part of a combined target construct nor nesed inside a target construct without intermediate code. This can lead to excessive register usage for unrelated target regions in the same translation unit due to spurious call edges assumed by ptxas.}} \
+                     // all-remark@#3 {{Specialize parallel region that is only reached from a single target region to avoid spurious call edges and excessive register usage in other target regions. (parallel region ID: __omp_outlined__1_wrapper, kernel ID: __omp_offloading_22}}
     {
     }
     bar();
 #pragma omp parallel // #4                                                                                                                                                                                                                                                                                                                                           \
-                     // expected-remark@#4 {{Found a parallel region that is called in a target region but not part of a combined target construct nor nesed inside a target construct without intermediate code. This can lead to excessive register usage for unrelated target regions in the same translation unit due to spurious call edges assumed by ptxas.}} \
-                     // expected-remark@#4 {{Specialize parallel region that is only reached from a single target region to avoid spurious call edges and excessive register usage in other target regions. (parallel region ID: __omp_outlined__3_wrapper, kernel ID: __omp_offloading_22}}
+                     // all-remark@#4 {{Found a parallel region that is called in a target region but not part of a combined target construct nor nesed inside a target construct without intermediate code. This can lead to excessive register usage for unrelated target regions in the same translation unit due to spurious call edges assumed by ptxas.}} \
+                     // all-remark@#4 {{Specialize parallel region that is only reached from a single target region to avoid spurious call edges and excessive register usage in other target regions. (parallel region ID: __omp_outlined__3_wrapper, kernel ID: __omp_offloading_22}}
     {
     }
   }
@@ -43,5 +47,5 @@
   }
 }
 
-// expected-remark@* {{OpenMP runtime call __kmpc_global_thread_num moved to}}
-// expected-remark@* {{OpenMP runtime call __kmpc_global_thread_num deduplicated}}
+// all-remark@* {{OpenMP runtime call __kmpc_global_thread_num moved to}}
+// all-remark@* {{OpenMP runtime call __kmpc_global_thread_num deduplicated}}
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to