This revision was landed with ongoing or failed builds.
This revision was automatically updated to reflect the committed changes.
Closed by commit rGefae695d52cc: Add -f[no-]loop-versioning option (authored by 
MatsPetersson).

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D141307/new/

https://reviews.llvm.org/D141307

Files:
  clang/include/clang/Driver/Options.td
  clang/lib/Driver/ToolChains/Flang.cpp
  clang/lib/Driver/ToolChains/Flang.h
  flang/include/flang/Frontend/CodeGenOptions.def
  flang/include/flang/Tools/CLOptions.inc
  flang/lib/Frontend/CompilerInvocation.cpp
  flang/lib/Frontend/FrontendActions.cpp
  flang/test/Driver/driver-help-hidden.f90
  flang/test/Driver/driver-help.f90
  flang/test/Driver/frontend-forwarding.f90
  flang/test/Driver/version-loops.f90

Index: flang/test/Driver/version-loops.f90
===================================================================
--- /dev/null
+++ flang/test/Driver/version-loops.f90
@@ -0,0 +1,54 @@
+! Test that flang-new forwards the -f{no-,}version-loops-for-stride 
+! options correctly to flang-new -fc1 for different variants of optimisation
+! and explicit flags.
+
+! RUN: %flang -### %s -o %t 2>&1   -O3 \
+! RUN:   | FileCheck %s
+  
+! RUN: %flang -### %s -o %t 2>&1 -O2 \
+! RUN:   | FileCheck %s --check-prefix=CHECK-O2
+
+! RUN: %flang -### %s -o %t 2>&1  -O2 -fversion-loops-for-stride \
+! RUN:   | FileCheck %s --check-prefix=CHECK-O2-with
+  
+! RUN: %flang -### %s -o %t 2>&1  -O4 \
+! RUN:   | FileCheck %s --check-prefix=CHECK-O4
+  
+! RUN: %flang -### %s -o %t 2>&1  -Ofast \
+! RUN:   | FileCheck %s --check-prefix=CHECK-Ofast
+  
+! RUN: %flang -### %s -o %t 2>&1 -Ofast -fno-version-loops-for-stride \
+! RUN:   | FileCheck %s --check-prefix=CHECK-Ofast-no
+
+! RUN: %flang -### %s -o %t 2>&1 -O3 -fno-version-loops-for-stride \
+! RUN:   | FileCheck %s --check-prefix=CHECK-O3-no
+
+! CHECK: "{{.*}}flang-new" "-fc1"
+! CHECK-SAME: "-fversion-loops-for-stride"
+! CHECK-SAME: "-O3"
+
+! CHECK-O2: "{{.*}}flang-new" "-fc1"
+! CHECK-O2-NOT: "-fversion-loops-for-stride"
+! CHECK-O2-SAME: "-O2"  
+
+! CHECK-O2-with: "{{.*}}flang-new" "-fc1"
+! CHECK-O2-with-SAME: "-fversion-loops-for-stride"
+! CHECK-O2-with-SAME: "-O2"  
+  
+! CHECK-O4: "{{.*}}flang-new" "-fc1"
+! CHECK-O4-SAME: "-fversion-loops-for-stride"
+! CHECK-O4-SAME: "-O3"
+
+! CHECK-Ofast: "{{.*}}flang-new" "-fc1"
+! CHECK-Ofast-SAME: "-ffast-math"
+! CHECK-Ofast-SAME: "-fversion-loops-for-stride"
+! CHECK-Ofast-SAME: "-O3"
+
+! CHECK-Ofast-no: "{{.*}}flang-new" "-fc1"
+! CHECK-Ofast-no-SAME: "-ffast-math"
+! CHECK-Ofast-no-NOT: "-fversion-loops-for-stride"
+! CHECK-Ofast-no-SAME: "-O3"
+
+! CHECK-O3-no: "{{.*}}flang-new" "-fc1"
+! CHECK-O3-no-NOT: "-fversion-loops-for-stride"
+! CHECK-O3-no-SAME: "-O3"
Index: flang/test/Driver/frontend-forwarding.f90
===================================================================
--- flang/test/Driver/frontend-forwarding.f90
+++ flang/test/Driver/frontend-forwarding.f90
@@ -15,6 +15,8 @@
 ! RUN:     -fassociative-math \
 ! RUN:     -freciprocal-math \
 ! RUN:     -fpass-plugin=Bye%pluginext \
+! RUN:     -fversion-loops-for-stride \
+! RUN:     -mllvm -print-before-all\
 ! RUN:     -mllvm -print-before-all \
 ! RUN:     -save-temps=obj \
 ! RUN:     -P \
@@ -34,5 +36,6 @@
 ! CHECK: "-freciprocal-math"
 ! CHECK: "-fconvert=little-endian"
 ! CHECK: "-fpass-plugin=Bye
+! CHECK: "-fversion-loops-for-stride"  
 ! CHECK: "-mllvm" "-print-before-all"
 ! CHECK: "-save-temps=obj"
Index: flang/test/Driver/driver-help.f90
===================================================================
--- flang/test/Driver/driver-help.f90
+++ flang/test/Driver/driver-help.f90
@@ -48,6 +48,8 @@
 ! HELP-NEXT: -fno-integrated-as      Disable the integrated assembler
 ! HELP-NEXT: -fno-signed-zeros      Allow optimizations that ignore the sign of floating point zeros
 ! HELP-NEXT: -fno-stack-arrays      Allocate array temporaries on the heap (default)
+! HELP-NEXT: -fno-version-loops-for-stride
+! HELP-NEXT:                        Do not create unit-strided loops (default)
 ! HELP-NEXT: -fopenacc              Enable OpenACC
 ! HELP-NEXT: -fopenmp               Parse OpenMP pragmas and generate parallel code.
 ! HELP-NEXT: -fpass-plugin=<dsopath> Load pass plugin from a dynamic shared object file (only with new pass manager).
@@ -55,6 +57,8 @@
 ! HELP-NEXT: -fstack-arrays         Attempt to allocate array temporaries on the stack, no matter their size
 ! HELP-NEXT: -fsyntax-only          Run the preprocessor, parser and semantic analysis stages
 ! HELP-NEXT: -funderscoring         Appends one trailing underscore to external names
+! HELP-NEXT: -fversion-loops-for-stride
+! HELP-NEXT:                        Create unit-strided versions of loops
 ! HELP-NEXT: -fxor-operator         Enable .XOR. as a synonym of .NEQV.
 ! HELP-NEXT: -gline-tables-only     Emit debug line number tables only
 ! HELP-NEXT: -g                     Generate source-level debug information
@@ -146,6 +150,8 @@
 ! HELP-FC1-NEXT: -fno-reformat          Dump the cooked character stream in -E mode
 ! HELP-FC1-NEXT: -fno-signed-zeros      Allow optimizations that ignore the sign of floating point zeros
 ! HELP-FC1-NEXT: -fno-stack-arrays      Allocate array temporaries on the heap (default)
+! HELP-FC1-NEXT: -fno-version-loops-for-stride
+! HELP-FC1-NEXT:                        Do not create unit-strided loops (default)
 ! HELP-FC1-NEXT: -fopenacc              Enable OpenACC
 ! HELP-FC1-NEXT: -fopenmp-is-device     Generate code only for an OpenMP target device.
 ! HELP-FC1-NEXT: -fopenmp-target-debug  Enable debugging in the OpenMP offloading device RTL
@@ -155,6 +161,8 @@
 ! HELP-FC1-NEXT: -fstack-arrays         Attempt to allocate array temporaries on the stack, no matter their size
 ! HELP-FC1-NEXT: -fsyntax-only          Run the preprocessor, parser and semantic analysis stages
 ! HELP-FC1-NEXT: -funderscoring         Appends one trailing underscore to external names
+! HELP-FC1-NEXT: -fversion-loops-for-stride
+! HELP-FC1-NEXT:                        Create unit-strided versions of loops
 ! HELP-FC1-NEXT: -fxor-operator         Enable .XOR. as a synonym of .NEQV.
 ! HELP-FC1-NEXT: -help                  Display available options
 ! HELP-FC1-NEXT: -init-only             Only execute frontend initialization
Index: flang/test/Driver/driver-help-hidden.f90
===================================================================
--- flang/test/Driver/driver-help-hidden.f90
+++ flang/test/Driver/driver-help-hidden.f90
@@ -52,6 +52,8 @@
 ! CHECK-NEXT: -fno-integrated-as     Disable the integrated assembler
 ! CHECK-NEXT: -fno-signed-zeros      Allow optimizations that ignore the sign of floating point zeros
 ! CHECK-NEXT: -fno-stack-arrays      Allocate array temporaries on the heap (default)
+! CHECK-NEXT: -fno-version-loops-for-stride
+! CHECK-NEXT:                        Do not create unit-strided loops (default)
 ! CHECK-NEXT: -fopenacc              Enable OpenACC
 ! CHECK-NEXT: -fopenmp               Parse OpenMP pragmas and generate parallel code.
 ! CHECK-NEXT: -fpass-plugin=<dsopath> Load pass plugin from a dynamic shared object file (only with new pass manager).
@@ -59,6 +61,8 @@
 ! CHECK-NEXT: -fstack-arrays         Attempt to allocate array temporaries on the stack, no matter their size
 ! CHECK-NEXT: -fsyntax-only          Run the preprocessor, parser and semantic analysis stages
 ! CHECK-NEXT: -funderscoring         Appends one trailing underscore to external names
+! CHECK-NEXT: -fversion-loops-for-stride
+! CHECK-NEXT:                        Create unit-strided versions of loops
 ! CHECK-NEXT: -fxor-operator         Enable .XOR. as a synonym of .NEQV.
 ! CHECK-NEXT: -gline-tables-only     Emit debug line number tables only
 ! CHECK-NEXT: -g                     Generate source-level debug information
Index: flang/lib/Frontend/FrontendActions.cpp
===================================================================
--- flang/lib/Frontend/FrontendActions.cpp
+++ flang/lib/Frontend/FrontendActions.cpp
@@ -665,7 +665,8 @@
 
   // Create the pass pipeline
   fir::createMLIRToLLVMPassPipeline(pm, level, opts.StackArrays,
-                                    opts.Underscoring, opts.getDebugInfo());
+                                    opts.Underscoring, opts.LoopVersioning,
+                                    opts.getDebugInfo());
   (void)mlir::applyPassManagerCLOptions(pm);
 
   // run the pass manager
@@ -704,7 +705,6 @@
       llvmModule->setPIELevel(
           static_cast<llvm::PIELevel::Level>(opts.PICLevel));
   }
-
 }
 
 bool CodeGenAction::setUpTargetMachine() {
Index: flang/lib/Frontend/CompilerInvocation.cpp
===================================================================
--- flang/lib/Frontend/CompilerInvocation.cpp
+++ flang/lib/Frontend/CompilerInvocation.cpp
@@ -163,6 +163,10 @@
                    clang::driver::options::OPT_fno_stack_arrays, false)) {
     opts.StackArrays = 1;
   }
+  if (args.hasFlag(clang::driver::options::OPT_floop_versioning,
+                   clang::driver::options::OPT_fno_loop_versioning, false)) {
+    opts.LoopVersioning = 1;
+  }
 
   for (auto *a : args.filtered(clang::driver::options::OPT_fpass_plugin_EQ))
     opts.LLVMPassPlugins.push_back(a->getValue());
Index: flang/include/flang/Tools/CLOptions.inc
===================================================================
--- flang/include/flang/Tools/CLOptions.inc
+++ flang/include/flang/Tools/CLOptions.inc
@@ -186,7 +186,7 @@
 /// \param pm - MLIR pass manager that will hold the pipeline definition
 inline void createDefaultFIROptimizerPassPipeline(mlir::PassManager &pm,
     llvm::OptimizationLevel optLevel = defaultOptLevel,
-    bool stackArrays = false) {
+    bool stackArrays = false, bool loopVersioning = false) {
   // simplify the IR
   mlir::GreedyRewriteConfig config;
   config.enableRegionSimplification = false;
@@ -283,11 +283,13 @@
 inline void createMLIRToLLVMPassPipeline(mlir::PassManager &pm,
     llvm::OptimizationLevel optLevel = defaultOptLevel,
     bool stackArrays = false, bool underscoring = true,
+    bool loopVersioning = false,
     llvm::codegenoptions::DebugInfoKind debugInfo = NoDebugInfo) {
   fir::createHLFIRToFIRPassPipeline(pm, optLevel);
 
   // Add default optimizer pass pipeline.
-  fir::createDefaultFIROptimizerPassPipeline(pm, optLevel, stackArrays);
+  fir::createDefaultFIROptimizerPassPipeline(
+      pm, optLevel, stackArrays, loopVersioning);
 
   // Add codegen pass pipeline.
   fir::createDefaultFIRCodeGenPassPipeline(
Index: flang/include/flang/Frontend/CodeGenOptions.def
===================================================================
--- flang/include/flang/Frontend/CodeGenOptions.def
+++ flang/include/flang/Frontend/CodeGenOptions.def
@@ -31,6 +31,7 @@
 CODEGENOPT(PrepareForThinLTO , 1, 0) ///< Set when -flto=thin is enabled on the
                                      ///< compile step.
 CODEGENOPT(StackArrays, 1, 0) ///< -fstack-arrays (enable the stack-arrays pass)
+CODEGENOPT(LoopVersioning, 1, 0) ///< Enable loop versioning.
 
 CODEGENOPT(Underscoring, 1, 1)
 ENUM_CODEGENOPT(RelocationModel, llvm::Reloc::Model, 3, llvm::Reloc::PIC_) ///< Name of the relocation model to use.
Index: clang/lib/Driver/ToolChains/Flang.h
===================================================================
--- clang/lib/Driver/ToolChains/Flang.h
+++ clang/lib/Driver/ToolChains/Flang.h
@@ -67,6 +67,14 @@
                          const JobAction &JA, const llvm::opt::ArgList &Args,
                          llvm::opt::ArgStringList &CmdArgs) const;
 
+  /// Extract options for code generation from the driver arguments and add them
+  /// to the command arguments.
+  ///
+  /// \param [in] Args The list of input driver arguments
+  /// \param [out] CmdArgs The list of output command arguments
+  void addCodegenOptions(const llvm::opt::ArgList &Args,
+                         llvm::opt::ArgStringList &CmdArgs) const;
+
   /// Extract other compilation options from the driver arguments and add them
   /// to the command arguments.
   ///
Index: clang/lib/Driver/ToolChains/Flang.cpp
===================================================================
--- clang/lib/Driver/ToolChains/Flang.cpp
+++ clang/lib/Driver/ToolChains/Flang.cpp
@@ -52,6 +52,55 @@
                    options::OPT_I, options::OPT_cpp, options::OPT_nocpp});
 }
 
+/// @C shouldLoopVersion
+///
+/// Check if Loop Versioning should be enabled.
+/// We look for the last of one of the following:
+///   -Ofast, -O4, -O<number> and -f[no-]version-loops-for-stride.
+/// Loop versioning is disabled if the last option is
+///  -fno-version-loops-for-stride.
+/// Loop versioning is enabled if the last option is one of:
+///  -floop-versioning
+///  -Ofast
+///  -O4
+///  -O3
+/// For all other cases, loop versioning is is disabled.
+///
+/// The gfortran compiler automatically enables the option for -O3 or -Ofast.
+///
+/// @return true if loop-versioning should be enabled, otherwise false.
+static bool shouldLoopVersion(const ArgList &Args) {
+  const Arg *LoopVersioningArg = Args.getLastArg(
+      options::OPT_Ofast, options::OPT_O, options::OPT_O4,
+      options::OPT_floop_versioning, options::OPT_fno_loop_versioning);
+  if (!LoopVersioningArg)
+    return false;
+
+  if (LoopVersioningArg->getOption().matches(options::OPT_fno_loop_versioning))
+    return false;
+
+  if (LoopVersioningArg->getOption().matches(options::OPT_floop_versioning))
+    return true;
+
+  if (LoopVersioningArg->getOption().matches(options::OPT_Ofast) ||
+      LoopVersioningArg->getOption().matches(options::OPT_O4))
+    return true;
+
+  if (LoopVersioningArg->getOption().matches(options::OPT_O)) {
+    StringRef S(LoopVersioningArg->getValue());
+    unsigned OptLevel = 0;
+    // Note -Os or Oz woould "fail" here, so return false. Which is the
+    // desiered behavior.
+    if (S.getAsInteger(10, OptLevel))
+      return false;
+
+    return OptLevel > 2;
+  }
+
+  llvm_unreachable("We should not end up here");
+  return false;
+}
+
 void Flang::addOtherOptions(const ArgList &Args, ArgStringList &CmdArgs) const {
   Args.AddAllArgs(CmdArgs,
                   {options::OPT_module_dir, options::OPT_fdebug_module_writer,
@@ -60,16 +109,6 @@
                    options::OPT_fconvert_EQ, options::OPT_fpass_plugin_EQ,
                    options::OPT_funderscoring, options::OPT_fno_underscoring});
 
-  Arg *stackArrays =
-      Args.getLastArg(options::OPT_Ofast, options::OPT_fstack_arrays,
-                      options::OPT_fno_stack_arrays);
-  if (stackArrays &&
-      !stackArrays->getOption().matches(options::OPT_fno_stack_arrays))
-    CmdArgs.push_back("-fstack-arrays");
-
-  if (Args.hasArg(options::OPT_flang_experimental_hlfir))
-    CmdArgs.push_back("-flang-experimental-hlfir");
-
   llvm::codegenoptions::DebugInfoKind DebugInfoKind;
   if (Args.hasArg(options::OPT_gN_Group)) {
     Arg *gNArg = Args.getLastArg(options::OPT_gN_Group);
@@ -82,6 +121,21 @@
   addDebugInfoKind(CmdArgs, DebugInfoKind);
 }
 
+void Flang::addCodegenOptions(const ArgList &Args,
+                              ArgStringList &CmdArgs) const {
+  Arg *stackArrays =
+      Args.getLastArg(options::OPT_Ofast, options::OPT_fstack_arrays,
+                      options::OPT_fno_stack_arrays);
+  if (stackArrays &&
+      !stackArrays->getOption().matches(options::OPT_fno_stack_arrays))
+    CmdArgs.push_back("-fstack-arrays");
+
+  if (Args.hasArg(options::OPT_flang_experimental_hlfir))
+    CmdArgs.push_back("-flang-experimental-hlfir");
+  if (shouldLoopVersion(Args))
+    CmdArgs.push_back("-fversion-loops-for-stride");
+}
+
 void Flang::addPicOptions(const ArgList &Args, ArgStringList &CmdArgs) const {
   // ParsePICArgs parses -fPIC/-fPIE and their variants and returns a tuple of
   // (RelocationModel, PICLevel, IsPIE).
@@ -391,6 +445,9 @@
   // Add target args, features, etc.
   addTargetOptions(Args, CmdArgs);
 
+  // Add Codegen options
+  addCodegenOptions(Args, CmdArgs);
+
   // Add other compile options
   addOtherOptions(Args, CmdArgs);
 
Index: clang/include/clang/Driver/Options.td
===================================================================
--- clang/include/clang/Driver/Options.td
+++ clang/include/clang/Driver/Options.td
@@ -5167,7 +5167,9 @@
 defm stack_arrays : BoolOptionWithoutMarshalling<"f", "stack-arrays",
   PosFlag<SetTrue, [], "Attempt to allocate array temporaries on the stack, no matter their size">,
   NegFlag<SetFalse, [], "Allocate array temporaries on the heap (default)">>;
-
+defm loop_versioning : BoolOptionWithoutMarshalling<"f", "version-loops-for-stride",
+  PosFlag<SetTrue, [], "Create unit-strided versions of loops">,
+   NegFlag<SetFalse, [], "Do not create unit-strided loops (default)">>;
 } // let Flags = [FC1Option, FlangOption, FlangOnlyOption]
 
 def J : JoinedOrSeparate<["-"], "J">,
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to