Leporacanthicus updated this revision to Diff 510756.
Leporacanthicus added a comment.

Rebase.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D141307/new/

https://reviews.llvm.org/D141307

Files:
  clang/include/clang/Driver/Options.td
  clang/lib/Driver/ToolChains/Flang.cpp
  flang/include/flang/Frontend/CodeGenOptions.def
  flang/include/flang/Tools/CLOptions.inc
  flang/lib/Frontend/CompilerInvocation.cpp
  flang/lib/Frontend/FrontendActions.cpp
  flang/test/Driver/driver-help-hidden.f90
  flang/test/Driver/driver-help.f90
  flang/test/Driver/frontend-forwarding.f90
  flang/test/Driver/version-loops.f90

Index: flang/test/Driver/version-loops.f90
===================================================================
--- /dev/null
+++ flang/test/Driver/version-loops.f90
@@ -0,0 +1,54 @@
+! Test that flang-new forwards the -f{no-,}version-loops-for-stride 
+! options corredly to flang-new -fc1 for different variants of optimisation
+! and explicit flags.
+
+! RUN: %flang -fsyntax-only -### %s -o %t 2>&1 \
+! RUN:   -O3 \
+! RUN:   | FileCheck %s
+  
+! RUN: %flang -fsyntax-only -### %s -o %t 2>&1 \
+! RUN:   -O2 \
+! RUN:   | FileCheck %s --check-prefix=CHECK-O2
+
+! RUN: %flang -fsyntax-only -### %s -o %t 2>&1 \
+! RUN:   -O2 -fversion-loops-for-stride \
+! RUN:   | FileCheck %s --check-prefix=CHECK-O2-with
+  
+! RUN: %flang -fsyntax-only -### %s -o %t 2>&1 \
+! RUN:   -O4 \
+! RUN:   | FileCheck %s --check-prefix=CHECK-O4
+  
+! RUN: %flang -fsyntax-only -### %s -o %t 2>&1 \
+! RUN:   -Ofast \
+! RUN:   | FileCheck %s --check-prefix=CHECK-Ofast
+  
+! RUN: %flang -fsyntax-only -### %s -o %t 2>&1 \
+! RUN:   -Ofast -fno-version-loops-for-stride \
+! RUN:   | FileCheck %s --check-prefix=CHECK-Ofast-no
+
+! RUN: %flang -fsyntax-only -### %s -o %t 2>&1 \
+! RUN:   -O3 -fno-version-loops-for-stride \
+! RUN:   | FileCheck %s --check-prefix=CHECK-O3-no
+  
+! CHECK: "-fversion-loops-for-stride"
+! CHECK: "-O3"
+
+! CHECK-O2-NOT: "-fversion-loops-for-stride"
+! CHECK-O2: "-O2"  
+
+! CHECK-O2-with: "-fversion-loops-for-stride"
+! CHECK-O2-with: "-O2"  
+  
+! CHECK-O4: "-fversion-loops-for-stride"
+! CHECK-O4: "-O3"
+
+! CHECK-Ofast: "-ffast-math"
+! CHECK-Ofast: "-fversion-loops-for-stride"
+! CHECK-Ofast: "-O3"
+
+! CHECK-Ofast-no: "-ffast-math"
+! CHECK-Ofast-no-NOT: "-fversion-loops-for-stride"
+! CHECK-Ofast-no: "-O3"
+
+! CHECK-O3-no-NOT: "-fversion-loops-for-stride"
+! CHECK-O3-no: "-O3"
Index: flang/test/Driver/frontend-forwarding.f90
===================================================================
--- flang/test/Driver/frontend-forwarding.f90
+++ flang/test/Driver/frontend-forwarding.f90
@@ -15,6 +15,8 @@
 ! RUN:     -fassociative-math \
 ! RUN:     -freciprocal-math \
 ! RUN:     -fpass-plugin=Bye%pluginext \
+! RUN:     -fversion-loops-for-stride \
+! RUN:     -mllvm -print-before-all\
 ! RUN:     -mllvm -print-before-all \
 ! RUN:     -save-temps=obj \
 ! RUN:     -P \
@@ -34,5 +36,6 @@
 ! CHECK: "-freciprocal-math"
 ! CHECK: "-fconvert=little-endian"
 ! CHECK: "-fpass-plugin=Bye
+! CHECK: "-fversion-loops-for-stride"  
 ! CHECK: "-mllvm" "-print-before-all"
 ! CHECK: "-save-temps=obj"
Index: flang/test/Driver/driver-help.f90
===================================================================
--- flang/test/Driver/driver-help.f90
+++ flang/test/Driver/driver-help.f90
@@ -48,6 +48,8 @@
 ! HELP-NEXT: -fno-integrated-as      Disable the integrated assembler
 ! HELP-NEXT: -fno-signed-zeros      Allow optimizations that ignore the sign of floating point zeros
 ! HELP-NEXT: -fno-stack-arrays      Allocate array temporaries on the heap (default)
+! HELP-NEXT: -fno-version-loops-for-stride
+! HELP-NEXT:                        Do not create unit-strided loops (default)
 ! HELP-NEXT: -fopenacc              Enable OpenACC
 ! HELP-NEXT: -fopenmp               Parse OpenMP pragmas and generate parallel code.
 ! HELP-NEXT: -fpass-plugin=<dsopath> Load pass plugin from a dynamic shared object file (only with new pass manager).
@@ -55,6 +57,8 @@
 ! HELP-NEXT: -fstack-arrays         Attempt to allocate array temporaries on the stack, no matter their size
 ! HELP-NEXT: -fsyntax-only          Run the preprocessor, parser and semantic analysis stages
 ! HELP-NEXT: -funderscoring         Appends one trailing underscore to external names
+! HELP-NEXT: -fversion-loops-for-stride
+! HELP-NEXT:                        Create unit-strided versions of loops
 ! HELP-NEXT: -fxor-operator         Enable .XOR. as a synonym of .NEQV.
 ! HELP-NEXT: -gline-tables-only     Emit debug line number tables only
 ! HELP-NEXT: -g                     Generate source-level debug information
@@ -143,6 +147,8 @@
 ! HELP-FC1-NEXT: -fno-reformat          Dump the cooked character stream in -E mode
 ! HELP-FC1-NEXT: -fno-signed-zeros      Allow optimizations that ignore the sign of floating point zeros
 ! HELP-FC1-NEXT: -fno-stack-arrays      Allocate array temporaries on the heap (default)
+! HELP-FC1-NEXT: -fno-version-loops-for-stride
+! HELP-FC1-NEXT:                        Do not create unit-strided loops (default)
 ! HELP-FC1-NEXT: -fopenacc              Enable OpenACC
 ! HELP-FC1-NEXT: -fopenmp-is-device     Generate code only for an OpenMP target device.
 ! HELP-FC1-NEXT: -fopenmp               Parse OpenMP pragmas and generate parallel code.
@@ -151,6 +157,8 @@
 ! HELP-FC1-NEXT: -fstack-arrays         Attempt to allocate array temporaries on the stack, no matter their size
 ! HELP-FC1-NEXT: -fsyntax-only          Run the preprocessor, parser and semantic analysis stages
 ! HELP-FC1-NEXT: -funderscoring         Appends one trailing underscore to external names
+! HELP-FC1-NEXT: -fversion-loops-for-stride
+! HELP-FC1-NEXT:                        Create unit-strided versions of loops
 ! HELP-FC1-NEXT: -fxor-operator         Enable .XOR. as a synonym of .NEQV.
 ! HELP-FC1-NEXT: -help                  Display available options
 ! HELP-FC1-NEXT: -init-only             Only execute frontend initialization
Index: flang/test/Driver/driver-help-hidden.f90
===================================================================
--- flang/test/Driver/driver-help-hidden.f90
+++ flang/test/Driver/driver-help-hidden.f90
@@ -52,6 +52,8 @@
 ! CHECK-NEXT: -fno-integrated-as     Disable the integrated assembler
 ! CHECK-NEXT: -fno-signed-zeros      Allow optimizations that ignore the sign of floating point zeros
 ! CHECK-NEXT: -fno-stack-arrays      Allocate array temporaries on the heap (default)
+! CHECK-NEXT: -fno-version-loops-for-stride
+! CHECK-NEXT:                        Do not create unit-strided loops (default)
 ! CHECK-NEXT: -fopenacc              Enable OpenACC
 ! CHECK-NEXT: -fopenmp               Parse OpenMP pragmas and generate parallel code.
 ! CHECK-NEXT: -fpass-plugin=<dsopath> Load pass plugin from a dynamic shared object file (only with new pass manager).
@@ -59,6 +61,8 @@
 ! CHECK-NEXT: -fstack-arrays         Attempt to allocate array temporaries on the stack, no matter their size
 ! CHECK-NEXT: -fsyntax-only          Run the preprocessor, parser and semantic analysis stages
 ! CHECK-NEXT: -funderscoring         Appends one trailing underscore to external names
+! CHECK-NEXT: -fversion-loops-for-stride
+! CHECK-NEXT:                        Create unit-strided versions of loops
 ! CHECK-NEXT: -fxor-operator         Enable .XOR. as a synonym of .NEQV.
 ! CHECK-NEXT: -gline-tables-only     Emit debug line number tables only
 ! CHECK-NEXT: -g                     Generate source-level debug information
Index: flang/lib/Frontend/FrontendActions.cpp
===================================================================
--- flang/lib/Frontend/FrontendActions.cpp
+++ flang/lib/Frontend/FrontendActions.cpp
@@ -665,7 +665,8 @@
 
   // Create the pass pipeline
   fir::createMLIRToLLVMPassPipeline(pm, level, opts.StackArrays,
-                                    opts.Underscoring, opts.getDebugInfo());
+                                    opts.Underscoring, opts.LoopVersioning,
+                                    opts.getDebugInfo());
   (void)mlir::applyPassManagerCLOptions(pm);
 
   // run the pass manager
@@ -704,7 +705,6 @@
       llvmModule->setPIELevel(
           static_cast<llvm::PIELevel::Level>(opts.PICLevel));
   }
-
 }
 
 bool CodeGenAction::setUpTargetMachine() {
Index: flang/lib/Frontend/CompilerInvocation.cpp
===================================================================
--- flang/lib/Frontend/CompilerInvocation.cpp
+++ flang/lib/Frontend/CompilerInvocation.cpp
@@ -163,6 +163,10 @@
                    clang::driver::options::OPT_fno_stack_arrays, false)) {
     opts.StackArrays = 1;
   }
+  if (args.hasFlag(clang::driver::options::OPT_floop_versioning,
+                   clang::driver::options::OPT_fno_loop_versioning, false)) {
+    opts.LoopVersioning = 1;
+  }
 
   for (auto *a : args.filtered(clang::driver::options::OPT_fpass_plugin_EQ))
     opts.LLVMPassPlugins.push_back(a->getValue());
Index: flang/include/flang/Tools/CLOptions.inc
===================================================================
--- flang/include/flang/Tools/CLOptions.inc
+++ flang/include/flang/Tools/CLOptions.inc
@@ -186,7 +186,7 @@
 /// \param pm - MLIR pass manager that will hold the pipeline definition
 inline void createDefaultFIROptimizerPassPipeline(mlir::PassManager &pm,
     llvm::OptimizationLevel optLevel = defaultOptLevel,
-    bool stackArrays = false) {
+    bool stackArrays = false, bool loopVersioning = false) {
   // simplify the IR
   mlir::GreedyRewriteConfig config;
   config.enableRegionSimplification = false;
@@ -283,11 +283,13 @@
 inline void createMLIRToLLVMPassPipeline(mlir::PassManager &pm,
     llvm::OptimizationLevel optLevel = defaultOptLevel,
     bool stackArrays = false, bool underscoring = true,
+    bool loopVersioning = false,
     llvm::codegenoptions::DebugInfoKind debugInfo = NoDebugInfo) {
   fir::createHLFIRToFIRPassPipeline(pm, optLevel);
 
   // Add default optimizer pass pipeline.
-  fir::createDefaultFIROptimizerPassPipeline(pm, optLevel, stackArrays);
+  fir::createDefaultFIROptimizerPassPipeline(
+      pm, optLevel, stackArrays, loopVersioning);
 
   // Add codegen pass pipeline.
   fir::createDefaultFIRCodeGenPassPipeline(
Index: flang/include/flang/Frontend/CodeGenOptions.def
===================================================================
--- flang/include/flang/Frontend/CodeGenOptions.def
+++ flang/include/flang/Frontend/CodeGenOptions.def
@@ -31,6 +31,7 @@
 CODEGENOPT(PrepareForThinLTO , 1, 0) ///< Set when -flto=thin is enabled on the
                                      ///< compile step.
 CODEGENOPT(StackArrays, 1, 0) ///< -fstack-arrays (enable the stack-arrays pass)
+CODEGENOPT(LoopVersioning, 1, 0) ///< Enable loop versioning.
 
 CODEGENOPT(Underscoring, 1, 1)
 ENUM_CODEGENOPT(RelocationModel, llvm::Reloc::Model, 3, llvm::Reloc::PIC_) ///< Name of the relocation model to use.
Index: clang/lib/Driver/ToolChains/Flang.cpp
===================================================================
--- clang/lib/Driver/ToolChains/Flang.cpp
+++ clang/lib/Driver/ToolChains/Flang.cpp
@@ -52,6 +52,32 @@
                    options::OPT_I, options::OPT_cpp, options::OPT_nocpp});
 }
 
+static bool shouldLoopVersion(const ArgList &Args) {
+  if (Arg *A = Args.getLastArg(options::OPT_Ofast, options::OPT_O,
+                               options::OPT_O4, options::OPT_floop_versioning,
+                               options::OPT_fno_loop_versioning)) {
+    if (A->getOption().matches(options::OPT_fno_loop_versioning))
+      return false;
+
+    if (A->getOption().matches(options::OPT_floop_versioning))
+      return true;
+
+    if (A->getOption().matches(options::OPT_Ofast) ||
+        A->getOption().matches(options::OPT_O4))
+      return true;
+
+    if (A->getOption().matches(options::OPT_O)) {
+      StringRef S(A->getValue());
+      unsigned OptLevel = 0;
+      if (S.getAsInteger(10, OptLevel))
+        return false;
+
+      return OptLevel > 2;
+    }
+  }
+  return false;
+}
+
 void Flang::addOtherOptions(const ArgList &Args, ArgStringList &CmdArgs) const {
   Args.AddAllArgs(CmdArgs,
                   {options::OPT_module_dir, options::OPT_fdebug_module_writer,
@@ -80,6 +106,8 @@
     DebugInfoKind = llvm::codegenoptions::NoDebugInfo;
   }
   addDebugInfoKind(CmdArgs, DebugInfoKind);
+  if (shouldLoopVersion(Args))
+    CmdArgs.push_back("-fversion-loops-for-stride");
 }
 
 void Flang::addPicOptions(const ArgList &Args, ArgStringList &CmdArgs) const {
Index: clang/include/clang/Driver/Options.td
===================================================================
--- clang/include/clang/Driver/Options.td
+++ clang/include/clang/Driver/Options.td
@@ -5154,7 +5154,9 @@
 defm stack_arrays : BoolOptionWithoutMarshalling<"f", "stack-arrays",
   PosFlag<SetTrue, [], "Attempt to allocate array temporaries on the stack, no matter their size">,
   NegFlag<SetFalse, [], "Allocate array temporaries on the heap (default)">>;
-
+defm loop_versioning : BoolOptionWithoutMarshalling<"f", "version-loops-for-stride",
+  PosFlag<SetTrue, [], "Create unit-strided versions of loops">,
+   NegFlag<SetFalse, [], "Do not create unit-strided loops (default)">>;
 } // let Flags = [FC1Option, FlangOption, FlangOnlyOption]
 
 def J : JoinedOrSeparate<["-"], "J">,
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to