huntergr created this revision.
Herald added a subscriber: rengolin.

Adds a new flag ('-fopenmp-simd') to clang which enables processing
of 'simd' and 'declare simd' pragmas without supporting the rest
of OpenMP.

The pragma handler will filter out directives and clauses which
aren't related to simd, and the driver will not add lib(g)omp to
the list of libraries to link.

Documentation updated to describe the new flag.


https://reviews.llvm.org/D31417

Files:
  docs/ClangCommandLineReference.rst
  docs/UsersManual.rst
  include/clang/Basic/LangOptions.def
  include/clang/Driver/Options.td
  lib/CodeGen/CodeGenModule.cpp
  lib/Driver/ToolChains/Clang.cpp
  lib/Frontend/CompilerInvocation.cpp
  lib/Parse/ParsePragma.cpp
  lib/Sema/SemaExpr.cpp
  lib/Sema/SemaOpenMP.cpp
  test/OpenMP/linking.c
  test/OpenMP/simd_only.c

Index: test/OpenMP/simd_only.c
===================================================================
--- /dev/null
+++ test/OpenMP/simd_only.c
@@ -0,0 +1,157 @@
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c -triple aarch64-unknown-unknown -emit-llvm %s -o - | FileCheck %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c -triple x86_64-unknown-unknown -emit-llvm %s -o - | FileCheck %s
+// expected-no-diagnostics
+
+// CHECK-LABEL: @simd_plain
+// CHECK-LABEL: omp.inner.for.body:
+// CHECK: load float, float* %arrayidx{{.*}} !llvm.mem.parallel_loop_access
+// CHECK: load float, float* %arrayidx{{.*}} !llvm.mem.parallel_loop_access
+// CHECK: store float %{{.*}}, float* %arrayidx{{.*}} !llvm.mem.parallel_loop_access
+// CHECK: ret void
+void simd_plain(float *a, float *b, float *c, int N) {
+  #pragma omp simd
+  for (int i = 0; i < N; i += 2)
+    a[i] = b[i] * c[i];
+}
+
+// CHECK-LABEL: @simd_safelen_clause
+// CHECK-NOT: !llvm.mem.parallel_loop_access
+// CHECK-LABEL: omp.inner.for.inc:
+// CHECK: br label %omp.inner.for.cond, !llvm.loop
+// CHECK: ret void
+void simd_safelen_clause(float *a, float *b, float *c, int N) {
+  #pragma omp simd safelen(4)
+  for (int i = 0; i < N; i += 2)
+    a[i] = b[i] * c[i];
+}
+
+extern long long initial_val();
+
+// CHECK-LABEL: @simd_simdlen_and_linear_clause
+// CHECK: omp.inner.for.body:
+// CHECK: !llvm.mem.parallel_loop_access
+// CHECK: ret void
+void simd_simdlen_and_linear_clause(float *a, float *b, float *c, int N) {
+  long long lv = initial_val();
+  #pragma omp simd simdlen(2) linear(lv: 4)
+  for (int i = 0; i < N; ++i) {
+    a[lv] = b[lv] * c[lv];
+    lv += 4;
+  }
+}
+
+extern float gfloat;
+
+// CHECK-LABEL: @simd_aligned_and_private_clause
+// CHECK-LABEL: entry:
+// CHECK: %gfloat = alloca float, align 4
+// CHECK: store float 1.000000e+00, float* @gfloat, align 4
+// CHECK-LABEL: omp.inner.for.body:
+// CHECK-NOT: @gfloat
+// CHECK: load{{.*}}!llvm.mem.parallel_loop_access
+// CHECK: store float {{.*}}, float* %gfloat, align 4, !llvm.mem.parallel_loop_access
+// CHECK: %[[FADD:add[0-9]+]] = fadd float %{{[0-9]+}}, 2.000000e+00
+// CHECK: store float %[[FADD]], float* {{.*}}, align 4, !llvm.mem.parallel_loop_access
+// CHECK: ret void
+void simd_aligned_and_private_clause(float *a, float *b, float *c, int N) {
+  gfloat = 1.0f;
+  #pragma omp simd aligned(a:4) private(gfloat)
+  for (int i = 0; i < N; i += 2) {
+    gfloat = b[i] * c[i];
+    a[i] = gfloat + 2.0f;
+  }
+}
+
+// CHECK-LABEL: @simd_lastprivate_and_reduction_clause
+// CHECK-LABEL: entry:
+// CHECK: %[[SUMVAR:sum[0-9]+]] = alloca float, align 4
+// CHECK: store float 0.000000e+00, float* %[[SUMVAR]], align 4
+// CHECK-LABEL: omp.inner.for.body
+// CHECK: %[[LOAD:[0-9]+]] = load float, float* %[[SUMVAR]], align 4, !llvm.mem.parallel_loop_access
+// CHECK: %[[FADD:add[0-9]+]] = fadd float %[[LOAD]], %mul{{.*}}
+// CHECK: store float %[[FADD]], float* %[[SUMVAR]], align 4, !llvm.mem.parallel_loop_access
+// CHECK: store i32{{.*}}, i32* %[[IDXVAR:idx[0-9]+]]
+// CHECK-LABEL: omp.inner.for.end:
+// CHECK-DAG: %[[TMP1:[0-9]+]] = load i32, i32* %[[IDXVAR]], align 4
+// CHECK-DAG: store i32 %[[TMP1]], i32* %idx, align 4
+// CHECK-DAG: %[[INITVAL:[0-9]+]] = load float, float* %sum, align 4
+// CHECK-DAG: %[[TMP2:[0-9]+]] = load float, float* %[[SUMVAR]], align 4
+// CHECK-DAG: %[[SUMMED:add[0-9]+]] = fadd float %[[INITVAL]], %[[TMP2]]
+// CHECK-DAG: store float %[[SUMMED]], float* %sum, align 4
+// CHECK-LABEL: simd.if.end:
+// CHECK: %[[OUTVAL:[0-9]+]] = load float, float* %sum, align 4
+// CHECK: %[[OUTADDR:[0-9]+]] = load float*, float** %a.addr, align 8
+// CHECK: store float %[[OUTVAL]], float* %[[OUTADDR]], align 4
+// CHECK: %[[RETIDX:[0-9]+]] = load i32, i32* %idx, align 4
+// CHECK: ret i32 %[[RETIDX]]
+int simd_lastprivate_and_reduction_clause(float *a, float *b, float *c, int N) {
+  float sum = 0.0f;
+  int idx;
+  #pragma omp simd lastprivate(idx) reduction(+:sum)
+  for (int i = 0; i < N; ++i) {
+    sum += b[i] * c[i];
+    idx = i * 2;
+  }
+
+  *a = sum;
+  return idx;
+}
+
+// CHECK-LABEL: @simd_collapse_clause
+// CHECK: omp.inner.for.body:
+// CHECK-NOT: for.body:
+// CHECK: ret void
+void simd_collapse_clause(float **a, float **b, float **c, int N, int M) {
+  #pragma omp simd collapse(2)
+  for (int i = 0; i < N; ++i)
+    for (int j = 0; j < N; ++j)
+      a[i][j] = b[i][j] * c[i][j];
+}
+
+// Negative tests; no simd directive, so should be normal code.
+
+// CHECK-LABEL: @parallel_for
+// CHECK-NOT: call void {{.*}} @__kmpc_fork_call
+// CHECK-NOT: @.omp_outlined.
+// CHECK-NOT: omp.inner.for.body:
+// CHECK: ret void
+void parallel_for(float *a, float *b, float *c, int N) {
+  #pragma omp parallel for
+  for (int i = 0; i < N; ++i)
+    a[i] = b[i] * c[i];
+}
+
+extern void long_running_func(int);
+
+// CHECK-LABEL: @taskloop
+// CHECK-NOT: call i8* @__kmpc_omp_task_alloc
+// CHECK-NOT: call void @__kmpc_taskloop
+// CHECK: ret void
+void taskloop(int N) {
+  #pragma omp taskloop
+  for (int i = 0; i < N; ++i)
+    long_running_func(i);
+}
+
+// Combined constructs; simd part should work, rest should be ignored.
+
+// CHECK-LABEL: @parallel_for_simd
+// CHECK-NOT: call void {{.*}} @__kmpc_fork_call
+// CHECK-NOT: @.omp_outlined.
+// CHECK: omp.inner.for.body:
+// CHECK: ret void
+void parallel_for_simd(float *a, float *b, float *c, int N) {
+#pragma omp parallel for simd num_threads(2) simdlen(4)
+  for (int i = 0; i < N; ++i)
+    a[i] = b[i] * c[i];
+}
+
+// Make sure there's no declarations for libomp runtime functions
+// CHECK-NOT: declare void @__kmpc
+
+// CHECK-LABEL: !llvm.ident = !{!0}
+
+// simd_safelen_clause width md
+// CHECK-DAG: !{{[0-9]+}} = !{!"llvm.loop.vectorize.width", i32 4}
+// simd_simdlen_clause width md
+// CHECK-DAG: !{{[0-9]+}} = !{!"llvm.loop.vectorize.width", i32 2}
Index: test/OpenMP/linking.c
===================================================================
--- test/OpenMP/linking.c
+++ test/OpenMP/linking.c
@@ -89,3 +89,14 @@
 // CHECK-MSVC-ILINK-64-SAME: -libpath:{{.+}}/../lib
 // CHECK-MSVC-ILINK-64-SAME: -defaultlib:libiomp5md.lib
 //
+// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+// RUN:     -fopenmp-simd -target aarch64-linux-gnu \
+// RUN:   | FileCheck --check-prefix=CHECK-SIMD-ONLY-AA64 %s
+// CHECK-SIMD-ONLY-AA64-NOT: "-l[[DEFAULT_OPENMP_LIB]]"
+// CHECK-SIMD-ONLY-AA64-NOT: "-lpthread"
+//
+// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+// RUN:     -fopenmp-simd -target x86_64-unknown_linux \
+// RUN:   | FileCheck --check-prefix=CHECK-SIMD-ONLY-X64 %s
+// CHECK-SIMD-ONLY-X64-NOT: "-l[[DEFAULT_OPENMP_LIB]]"
+// CHECK-SIMD-ONLY-X64-NOT: "-lpthread"
Index: lib/Sema/SemaOpenMP.cpp
===================================================================
--- lib/Sema/SemaOpenMP.cpp
+++ lib/Sema/SemaOpenMP.cpp
@@ -983,7 +983,7 @@
 }
 
 VarDecl *Sema::IsOpenMPCapturedDecl(ValueDecl *D) {
-  assert(LangOpts.OpenMP && "OpenMP is not allowed");
+  assert((LangOpts.OpenMP || LangOpts.OpenMPSimd) && "OpenMP is not allowed");
   D = getCanonicalDecl(D);
 
   // If we are attempting to capture a global variable in a directive with
@@ -1029,7 +1029,7 @@
 }
 
 bool Sema::isOpenMPPrivateDecl(ValueDecl *D, unsigned Level) {
-  assert(LangOpts.OpenMP && "OpenMP is not allowed");
+  assert((LangOpts.OpenMP || LangOpts.OpenMPSimd) && "OpenMP is not allowed");
   return DSAStack->hasExplicitDSA(
       D, [](OpenMPClauseKind K) -> bool { return K == OMPC_private; }, Level);
 }
Index: lib/Sema/SemaExpr.cpp
===================================================================
--- lib/Sema/SemaExpr.cpp
+++ lib/Sema/SemaExpr.cpp
@@ -13953,7 +13953,8 @@
   // Capture global variables if it is required to use private copy of this
   // variable.
   bool IsGlobal = !Var->hasLocalStorage();
-  if (IsGlobal && !(LangOpts.OpenMP && IsOpenMPCapturedDecl(Var)))
+  if (IsGlobal && !((LangOpts.OpenMP || LangOpts.OpenMPSimd) &&
+                    IsOpenMPCapturedDecl(Var)))
     return true;
 
   // Walk up the stack to determine whether we can capture the variable,
Index: lib/Parse/ParsePragma.cpp
===================================================================
--- lib/Parse/ParsePragma.cpp
+++ lib/Parse/ParsePragma.cpp
@@ -12,6 +12,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "clang/AST/ASTContext.h"
+#include "clang/Basic/OpenMPKinds.h"
 #include "clang/Basic/PragmaKinds.h"
 #include "clang/Basic/TargetInfo.h"
 #include "clang/Lex/Preprocessor.h"
@@ -98,6 +99,12 @@
                     Token &FirstToken) override;
 };
 
+struct PragmaOpenMPSIMDHandler : public PragmaHandler {
+  PragmaOpenMPSIMDHandler() : PragmaHandler("omp") { }
+  void HandlePragma(Preprocessor &PP, PragmaIntroducerKind Introducer,
+                    Token &FirstToken) override;
+};
+
 /// PragmaCommentHandler - "\#pragma comment ...".
 struct PragmaCommentHandler : public PragmaHandler {
   PragmaCommentHandler(Sema &Actions)
@@ -215,6 +222,8 @@
   }
   if (getLangOpts().OpenMP)
     OpenMPHandler.reset(new PragmaOpenMPHandler());
+  else if (getLangOpts().OpenMPSimd)
+    OpenMPHandler.reset(new PragmaOpenMPSIMDHandler());
   else
     OpenMPHandler.reset(new PragmaNoOpenMPHandler());
   PP.AddPragmaHandler(OpenMPHandler.get());
@@ -1548,6 +1557,111 @@
                       /*DisableMacroExpansion=*/false);
 }
 
+/// \brief Handle '#pragma omp ...' when only OpenMP simd is enabled
+///
+void
+PragmaOpenMPSIMDHandler::HandlePragma(Preprocessor &PP,
+                                      PragmaIntroducerKind Introducer,
+                                      Token &FirstTok) {
+  SmallVector<Token, 16> Pragma;
+  Token Tok;
+  Tok.startToken();
+  Tok.setKind(tok::annot_pragma_openmp);
+  Tok.setLocation(FirstTok.getLocation());
+  Pragma.push_back(Tok);
+  PP.Lex(Tok);
+  auto Text = PP.getSpelling(Tok);
+
+  bool isSimd = false;
+  bool isDeclare = false;
+  // Check for a declare before everything else, since that isn't recognized
+  // as a directive on its own.
+  if (Text == "declare") {
+    Pragma.push_back(Tok);
+    isDeclare = true;
+    PP.Lex(Tok);
+    Text = PP.getSpelling(Tok);
+  }
+
+  // Discard directives that aren't related to simd.
+  while (Tok.isNot(tok::eod) && getOpenMPDirectiveKind(Text) != OMPD_unknown) {
+    if (Text == "simd") {
+      Pragma.push_back(Tok);
+      isSimd = true;
+    }
+
+    PP.Lex(Tok);
+    Text = PP.getSpelling(Tok);
+  }
+
+  // If we didn't encounter a simd directive, discard the whole pragma and warn
+  // about it (if enabled).
+  if (!isSimd) {
+    if (!PP.getDiagnostics().isIgnored(diag::warn_pragma_omp_ignored,
+                                       FirstTok.getLocation())) {
+      PP.Diag(FirstTok, diag::warn_pragma_omp_ignored);
+      PP.getDiagnostics().setSeverity(diag::warn_pragma_omp_ignored,
+                                      diag::Severity::Ignored,
+                                      SourceLocation());
+    }
+
+    if (Tok.isNot(tok::eod))
+      PP.DiscardUntilEndOfDirective();
+    return;
+  }
+
+  auto DirectiveKind = isDeclare ? OMPD_declare_simd : OMPD_simd;
+
+  // Read through any clauses, only save those which apply to simd or
+  // declare simd directives
+  while (Tok.isNot(tok::eod)) {
+    Text = PP.getSpelling(Tok);
+    bool Allowed = isAllowedClauseForDirective(DirectiveKind,
+                                               getOpenMPClauseKind(Text));
+
+    if (Allowed)
+      Pragma.push_back(Tok);
+
+    PP.Lex(Tok);
+
+    // For clauses with arguments, we need to process everything up to a
+    // matching parenthesis. Either add to the current pragma or discard
+    // if it's not supported for the current directive kind.
+    if (Tok.is(tok::l_paren)) {
+      while (Tok.isNot(tok::r_paren)) {
+        // Make sure we bail out of processing clauses if we hit the end of the
+        // directive before finishing the arguments. Let the full openmp parser
+        // deal with reporting errors for malformed pragmas.
+        if (Tok.is(tok::eod))
+          break;
+
+        if (Allowed)
+          Pragma.push_back(Tok);
+
+        PP.Lex(Tok);
+      }
+
+      if (Tok.isNot(tok::eod)) {
+        // Add the ')' if we're not throwing this clause away
+        if (Allowed)
+          Pragma.push_back(Tok);
+        PP.Lex(Tok);
+      }
+    }
+  }
+
+  SourceLocation EodLoc = Tok.getLocation();
+  Tok.startToken();
+  Tok.setKind(tok::annot_pragma_openmp_end);
+  Tok.setLocation(EodLoc);
+  Pragma.push_back(Tok);
+
+  auto Toks = llvm::make_unique<Token[]>(Pragma.size());
+  std::copy(Pragma.begin(), Pragma.end(), Toks.get());
+  PP.EnterTokenStream(std::move(Toks), Pragma.size(),
+                      /*DisableMacroExpansion=*/false);
+}
+
 /// \brief Handle '#pragma pointers_to_members'
 // The grammar for this pragma is as follows:
 //
Index: lib/Frontend/CompilerInvocation.cpp
===================================================================
--- lib/Frontend/CompilerInvocation.cpp
+++ lib/Frontend/CompilerInvocation.cpp
@@ -2208,6 +2208,7 @@
       Opts.OpenMP && !Args.hasArg(options::OPT_fnoopenmp_use_tls);
   Opts.OpenMPIsDevice =
       Opts.OpenMP && Args.hasArg(options::OPT_fopenmp_is_device);
+  Opts.OpenMPSimd = !Opts.OpenMP && Args.hasArg(options::OPT_fopenmp_simd);
 
   if (Opts.OpenMP) {
     int Version =
Index: lib/Driver/ToolChains/Clang.cpp
===================================================================
--- lib/Driver/ToolChains/Clang.cpp
+++ lib/Driver/ToolChains/Clang.cpp
@@ -3232,7 +3232,9 @@
       // semantic analysis, etc.
       break;
     }
-  }
+  } else if (Args.hasFlag(options::OPT_fopenmp_simd,
+                          options::OPT_fno_openmp_simd, /*Default=*/false))
+    CmdArgs.push_back("-fopenmp-simd");
 
   const SanitizerArgs &Sanitize = getToolChain().getSanitizerArgs();
   Sanitize.addArgs(getToolChain(), Args, CmdArgs, InputType);
Index: lib/CodeGen/CodeGenModule.cpp
===================================================================
--- lib/CodeGen/CodeGenModule.cpp
+++ lib/CodeGen/CodeGenModule.cpp
@@ -119,7 +119,7 @@
     createObjCRuntime();
   if (LangOpts.OpenCL)
     createOpenCLRuntime();
-  if (LangOpts.OpenMP)
+  if (LangOpts.OpenMP || LangOpts.OpenMPSimd)
     createOpenMPRuntime();
   if (LangOpts.CUDA)
     createCUDARuntime();
Index: include/clang/Driver/Options.td
===================================================================
--- include/clang/Driver/Options.td
+++ include/clang/Driver/Options.td
@@ -1252,6 +1252,8 @@
 def fomit_frame_pointer : Flag<["-"], "fomit-frame-pointer">, Group<f_Group>;
 def fopenmp : Flag<["-"], "fopenmp">, Group<f_Group>, Flags<[CC1Option, NoArgumentUnused]>;
 def fno_openmp : Flag<["-"], "fno-openmp">, Group<f_Group>, Flags<[NoArgumentUnused]>;
+def fopenmp_simd : Flag<["-"], "fopenmp-simd">, Group<f_Group>, Flags<[CC1Option, NoArgumentUnused]>;
+def fno_openmp_simd : Flag<["-"], "fno-openmp-simd">, Group<f_Group>, Flags<[NoArgumentUnused]>;
 def fopenmp_version_EQ : Joined<["-"], "fopenmp-version=">, Group<f_Group>, Flags<[CC1Option, NoArgumentUnused]>;
 def fopenmp_EQ : Joined<["-"], "fopenmp=">, Group<f_Group>;
 def fopenmp_use_tls : Flag<["-"], "fopenmp-use-tls">, Group<f_Group>, Flags<[NoArgumentUnused]>;
Index: include/clang/Basic/LangOptions.def
===================================================================
--- include/clang/Basic/LangOptions.def
+++ include/clang/Basic/LangOptions.def
@@ -187,6 +187,7 @@
 LANGOPT(HalfArgsAndReturns, 1, 0, "half args and returns")
 LANGOPT(CUDA              , 1, 0, "CUDA")
 LANGOPT(OpenMP            , 32, 0, "OpenMP support and version of OpenMP (31, 40 or 45)")
+LANGOPT(OpenMPSimd        , 1, 0, "OpenMP support for simd and declare simd directives only")
 LANGOPT(OpenMPUseTLS      , 1, 0, "Use TLS for threadprivates or runtime calls")
 LANGOPT(OpenMPIsDevice    , 1, 0, "Generate code only for OpenMP target device")
 LANGOPT(RenderScript      , 1, 0, "RenderScript")
Index: docs/UsersManual.rst
===================================================================
--- docs/UsersManual.rst
+++ docs/UsersManual.rst
@@ -1988,6 +1988,11 @@
 Use `-fopenmp` to enable OpenMP. Support for OpenMP can be disabled with
 `-fno-openmp`.
 
+Use `-fopenmp-simd` to enable OpenMP simd features only, without linking
+the runtime library; for combined constructs
+(e.g. ``#pragma omp parallel for simd``) the non-simd directives and clauses
+will be ignored. This can be disabled with `-fno-openmp-simd`.
+
 Controlling implementation limits
 ---------------------------------
 
Index: docs/ClangCommandLineReference.rst
===================================================================
--- docs/ClangCommandLineReference.rst
+++ docs/ClangCommandLineReference.rst
@@ -1451,6 +1451,8 @@
 
 .. option:: -fopenmp, -fno-openmp
 
+.. option:: -fopenmp-simd, -fno-openmp-simd
+
 .. option:: -fopenmp-dump-offload-linker-script
 
 .. option:: -fopenmp-use-tls
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to