[PATCH] D77989: Allow disabling of vectorization using internal options

Teresa Johnson via Phabricator via cfe-commits Tue, 14 Apr 2020 17:27:10 -0700

tejohnson updated this revision to Diff 257562.
tejohnson added a comment.

Address comment



Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D77989/new/

https://reviews.llvm.org/D77989

Files:
  clang/test/CodeGen/thinlto-loop-vectorize-pm.c
  clang/test/CodeGen/thinlto-slp-vectorize-pm.c
  llvm/include/llvm/Passes/PassBuilder.h
  llvm/include/llvm/Transforms/Vectorize/LoopVectorize.h
  llvm/include/llvm/Transforms/Vectorize/SLPVectorizer.h
  llvm/lib/Passes/PassBuilder.cpp
  llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
  llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
  llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
  llvm/test/Transforms/SLPVectorizer/X86/opt.ll
  llvm/tools/opt/opt.cpp

Index: llvm/tools/opt/opt.cpp
===================================================================
--- llvm/tools/opt/opt.cpp
+++ llvm/tools/opt/opt.cpp
@@ -180,11 +180,6 @@
                      cl::desc("Disable loop unrolling in all relevant passes"),
                      cl::init(false));
 
-static cl::opt<bool>
-DisableSLPVectorization("disable-slp-vectorization",
-                        cl::desc("Disable the slp vectorization pass"),
-                        cl::init(false));
-
 static cl::opt<bool> EmitSummaryIndex("module-summary",
                                       cl::desc("Emit module summary index"),
                                       cl::init(false));
@@ -406,18 +401,9 @@
   Builder.DisableUnrollLoops = (DisableLoopUnrolling.getNumOccurrences() > 0) ?
                                DisableLoopUnrolling : OptLevel == 0;
 
-  // Check if vectorization is explicitly disabled via -vectorize-loops=false.
-  // The flag enables vectorization in the LoopVectorize pass, it is on by
-  // default, and if it was disabled, leave it disabled here.
-  // Another flag that exists: -loop-vectorize, controls adding the pass to the
-  // pass manager. If set, the pass is added, and there is no additional check
-  // here for it.
-  if (Builder.LoopVectorize)
-    Builder.LoopVectorize = OptLevel > 1 && SizeLevel < 2;
-
-  // When #pragma vectorize is on for SLP, do the same as above
-  Builder.SLPVectorize =
-      DisableSLPVectorization ? false : OptLevel > 1 && SizeLevel < 2;
+  Builder.LoopVectorize = OptLevel > 1 && SizeLevel < 2;
+
+  Builder.SLPVectorize = OptLevel > 1 && SizeLevel < 2;
 
   if (TM)
     TM->adjustPassManager(Builder);
Index: llvm/test/Transforms/SLPVectorizer/X86/opt.ll
===================================================================
--- llvm/test/Transforms/SLPVectorizer/X86/opt.ll
+++ llvm/test/Transforms/SLPVectorizer/X86/opt.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt < %s -O3 -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s --check-prefix=SLP
-; RUN: opt < %s -O3 -disable-slp-vectorization -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s --check-prefix=NOSLP
+; RUN: opt < %s -O3 -vectorize-slp=false -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s --check-prefix=NOSLP
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.8.0"
Index: llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
===================================================================
--- llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -108,9 +108,8 @@
 
 STATISTIC(NumVectorInstructions, "Number of vector instructions generated");
 
-cl::opt<bool>
-    llvm::RunSLPVectorization("vectorize-slp", cl::init(false), cl::Hidden,
-                              cl::desc("Run the SLP vectorization passes"));
+cl::opt<bool> RunSLPVectorization("vectorize-slp", cl::init(true), cl::Hidden,
+                                  cl::desc("Run the SLP vectorization passes"));
 
 static cl::opt<int>
     SLPCostThreshold("slp-threshold", cl::init(0), cl::Hidden,
@@ -5645,6 +5644,8 @@
                                 LoopInfo *LI_, DominatorTree *DT_,
                                 AssumptionCache *AC_, DemandedBits *DB_,
                                 OptimizationRemarkEmitter *ORE_) {
+  if (!RunSLPVectorization)
+    return false;
   SE = SE_;
   TTI = TTI_;
   TLI = TLI_;
Index: llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
===================================================================
--- llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -1590,9 +1590,8 @@
 
   explicit LoopVectorize(bool InterleaveOnlyWhenForced = false,
                          bool VectorizeOnlyWhenForced = false)
-      : FunctionPass(ID) {
-    Impl.InterleaveOnlyWhenForced = InterleaveOnlyWhenForced;
-    Impl.VectorizeOnlyWhenForced = VectorizeOnlyWhenForced;
+      : FunctionPass(ID),
+        Impl({InterleaveOnlyWhenForced, VectorizeOnlyWhenForced}) {
     initializeLoopVectorizePass(*PassRegistry::getPassRegistry());
   }
 
@@ -7615,6 +7614,12 @@
   return true;
 }
 
+LoopVectorizePass::LoopVectorizePass(LoopVectorizeOptions Opts)
+    : InterleaveOnlyWhenForced(Opts.InterleaveOnlyWhenForced ||
+                               !EnableLoopInterleaving),
+      VectorizeOnlyWhenForced(Opts.VectorizeOnlyWhenForced ||
+                              !EnableLoopVectorization) {}
+
 bool LoopVectorizePass::processLoop(Loop *L) {
   assert((EnableVPlanNativePath || L->empty()) &&
          "VPlan-native path is not enabled. Only process inner loops.");
Index: llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
===================================================================
--- llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
+++ llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
@@ -159,9 +159,9 @@
     LibraryInfo = nullptr;
     Inliner = nullptr;
     DisableUnrollLoops = false;
-    SLPVectorize = RunSLPVectorization;
-    LoopVectorize = EnableLoopVectorization;
-    LoopsInterleaved = EnableLoopInterleaving;
+    SLPVectorize = false;
+    LoopVectorize = true;
+    LoopsInterleaved = true;
     RerollLoops = RunLoopRerolling;
     NewGVN = RunNewGVN;
     LicmMssaOptCap = SetLicmMssaOptCap;
Index: llvm/lib/Passes/PassBuilder.cpp
===================================================================
--- llvm/lib/Passes/PassBuilder.cpp
+++ llvm/lib/Passes/PassBuilder.cpp
@@ -242,9 +242,9 @@
     cl::desc("Enable call graph profile pass for the new PM (default = on)"));
 
 PipelineTuningOptions::PipelineTuningOptions() {
-  LoopInterleaving = EnableLoopInterleaving;
-  LoopVectorization = EnableLoopVectorization;
-  SLPVectorization = RunSLPVectorization;
+  LoopInterleaving = true;
+  LoopVectorization = true;
+  SLPVectorization = false;
   LoopUnrolling = true;
   ForgetAllSCEVInLoopUnroll = ForgetSCEVInLoopUnroll;
   Coroutines = false;
Index: llvm/include/llvm/Transforms/Vectorize/SLPVectorizer.h
===================================================================
--- llvm/include/llvm/Transforms/Vectorize/SLPVectorizer.h
+++ llvm/include/llvm/Transforms/Vectorize/SLPVectorizer.h
@@ -55,8 +55,6 @@
 
 } // end namespace slpvectorizer
 
-extern cl::opt<bool> RunSLPVectorization;
-
 struct SLPVectorizerPass : public PassInfoMixin<SLPVectorizerPass> {
   using StoreList = SmallVector<StoreInst *, 8>;
   using StoreListMap = MapVector<Value *, StoreList>;
Index: llvm/include/llvm/Transforms/Vectorize/LoopVectorize.h
===================================================================
--- llvm/include/llvm/Transforms/Vectorize/LoopVectorize.h
+++ llvm/include/llvm/Transforms/Vectorize/LoopVectorize.h
@@ -118,6 +118,7 @@
 
 /// The LoopVectorize Pass.
 struct LoopVectorizePass : public PassInfoMixin<LoopVectorizePass> {
+private:
   /// If false, consider all loops for interleaving.
   /// If true, only loops that explicitly request interleaving are considered.
   bool InterleaveOnlyWhenForced;
@@ -126,9 +127,8 @@
   /// If true, only loops that explicitly request vectorization are considered.
   bool VectorizeOnlyWhenForced;
 
-  LoopVectorizePass(LoopVectorizeOptions Opts = {})
-      : InterleaveOnlyWhenForced(Opts.InterleaveOnlyWhenForced),
-        VectorizeOnlyWhenForced(Opts.VectorizeOnlyWhenForced) {}
+public:
+  LoopVectorizePass(LoopVectorizeOptions Opts = {});
 
   ScalarEvolution *SE;
   LoopInfo *LI;
Index: llvm/include/llvm/Passes/PassBuilder.h
===================================================================
--- llvm/include/llvm/Passes/PassBuilder.h
+++ llvm/include/llvm/Passes/PassBuilder.h
@@ -73,16 +73,15 @@
   /// can be set in the PassBuilder when using a LLVM as a library.
   PipelineTuningOptions();
 
-  /// Tuning option to set loop interleaving on/off. Its default value is that
-  /// of the flag: `-interleave-loops`.
+  /// Tuning option to set loop interleaving on/off, set based on opt level.
   bool LoopInterleaving;
 
-  /// Tuning option to enable/disable loop vectorization. Its default value is
-  /// that of the flag: `-vectorize-loops`.
+  /// Tuning option to enable/disable loop vectorization, set based on opt
+  /// level.
   bool LoopVectorization;
 
-  /// Tuning option to enable/disable slp loop vectorization. Its default value
-  /// is that of the flag: `vectorize-slp`.
+  /// Tuning option to enable/disable slp loop vectorization, set based on opt
+  /// level.
   bool SLPVectorization;
 
   /// Tuning option to enable/disable loop unrolling. Its default value is true.
Index: clang/test/CodeGen/thinlto-slp-vectorize-pm.c
===================================================================
--- clang/test/CodeGen/thinlto-slp-vectorize-pm.c
+++ clang/test/CodeGen/thinlto-slp-vectorize-pm.c
@@ -1,50 +1,27 @@
 // REQUIRES: x86-registered-target
-// RUN: %clang_cc1 -o %t.o -flto=thin -fexperimental-new-pass-manager -triple x86_64-unknown-linux-gnu -emit-llvm-bc %s
+// RUN: %clang_cc1 -o %t.o -O2 -flto=thin -fexperimental-new-pass-manager -triple x86_64-unknown-linux-gnu -emit-llvm-bc %s
 // RUN: llvm-lto -thinlto -o %t %t.o
 
 // Test to ensure the slp vectorize codegen option is passed down to the
 // ThinLTO backend. -vectorize-slp is a cc1 option and will be added
-// automatically when O2/O3/Os/Oz is available for clang. Once -vectorize-slp
-// is enabled, "-mllvm -vectorize-slp=false" won't disable slp vectorization
-// currently. "-mllvm -vectorize-slp=false" is added here in the test to
-// ensure the slp vectorization is executed because the -vectorize-slp cc1
-// flag is passed down, not because "-mllvm -vectorize-slp" is enabled
-// by default.
+// automatically when O2/O3/Os/Oz is available for clang. Also check that
+// "-mllvm -vectorize-slp=false" will disable slp vectorization, overriding
+// the cc1 option.
 //
-// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-obj -O2 -vectorize-slp -mllvm -vectorize-slp=false -o %t2.o -x ir %t.o -fthinlto-index=%t.thinlto.bc -fdebug-pass-manager -fexperimental-new-pass-manager 2>&1 | FileCheck %s --check-prefix=O2-SLP
-// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-obj -O0 -vectorize-slp -mllvm -vectorize-slp=false -o %t2.o -x ir %t.o -fthinlto-index=%t.thinlto.bc -fdebug-pass-manager -fexperimental-new-pass-manager 2>&1 | FileCheck %s --check-prefix=O0-SLP
-// O2-SLP: Running pass: SLPVectorizerPass
-// O0-SLP-NOT: Running pass: SLPVectorizerPass
-
-// Test to ensure the loop vectorize codegen option is passed down to the
-// ThinLTO backend. -vectorize-loops is a cc1 option and will be added
-// automatically when O2/O3/Os is available for clang. Once -vectorize-loops is
-// enabled, "-mllvm -vectorize-loops=false" won't disable loop vectorization
-// currently. "-mllvm -vectorize-loops=false" is added here in the test to
-// ensure the loop vectorization is executed because the -vectorize-loops cc1
-// flag is passed down, not because "-mllvm -vectorize-loops" is enabled
-// by default.
-//
-// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-obj -O2 -vectorize-loops -mllvm -vectorize-loops=false -mllvm -force-vector-width=2 -mllvm -force-vector-interleave=1 -emit-llvm -o - -x ir %t.o -fthinlto-index=%t.thinlto.bc -fexperimental-new-pass-manager 2>&1 | FileCheck %s --check-prefix=O2-LPV
-// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-obj -O0 -vectorize-loops -mllvm -vectorize-loops=false -mllvm -force-vector-width=2 -mllvm -force-vector-interleave=1 -emit-llvm -o - -x ir %t.o -fthinlto-index=%t.thinlto.bc -fexperimental-new-pass-manager 2>&1 | FileCheck %s --check-prefix=O0-LPV
-// O2-LPV: = !{!"llvm.loop.isvectorized", i32 1}
-// O0-LPV-NOT: = !{!"llvm.loop.isvectorized", i32 1}
-
-// Test to ensure the loop interleave codegen option is passed down to the
-// ThinLTO backend. The internal loop interleave codegen option will be
-// enabled automatically when O2/O3 is available for clang. Once the loop
-// interleave option is enabled, "-mllvm -interleave-loops=false" won't disable
-// the interleave. currently. "-mllvm -interleave-loops=false" is added here
-// in the test to ensure the loop interleave is executed because the interleave
-// codegen flag is passed down, not because "-mllvm -interleave-loops" is
-// enabled by default.
+// Check both the new and old PMs.
 //
-// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-obj -O2 -vectorize-loops -mllvm -interleave-loops=false -mllvm -force-vector-width=1 -mllvm -force-vector-interleave=2 -emit-llvm -o - -x ir %t.o -fthinlto-index=%t.thinlto.bc -fexperimental-new-pass-manager 2>&1 | FileCheck %s --check-prefix=O2-InterLeave
-// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-obj -O0 -vectorize-loops -mllvm -interleave-loops=false -mllvm -force-vector-width=1 -mllvm -force-vector-interleave=2 -emit-llvm -o - -x ir %t.o -fthinlto-index=%t.thinlto.bc -fexperimental-new-pass-manager 2>&1 | FileCheck %s --check-prefix=O0-InterLeave
-// O2-InterLeave: = !{!"llvm.loop.isvectorized", i32 1}
-// O0-InterLeave-NOT: = !{!"llvm.loop.isvectorized", i32 1}
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm -O2 -vectorize-slp -o - -x ir %t.o -fthinlto-index=%t.thinlto.bc -fexperimental-new-pass-manager 2>&1 | FileCheck %s --check-prefix=SLP
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm -O2 -vectorize-slp -mllvm -vectorize-slp=false -o - -x ir %t.o -fthinlto-index=%t.thinlto.bc -fexperimental-new-pass-manager 2>&1 | FileCheck %s --check-prefix=NOSLP
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm -O2 -vectorize-slp -o - -x ir %t.o -fthinlto-index=%t.thinlto.bc 2>&1 | FileCheck %s --check-prefix=SLP
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm -O2 -vectorize-slp -mllvm -vectorize-slp=false -o - -x ir %t.o -fthinlto-index=%t.thinlto.bc 2>&1 | FileCheck %s --check-prefix=NOSLP
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm -O0 -vectorize-slp -o - -x ir %t.o -fthinlto-index=%t.thinlto.bc -fexperimental-new-pass-manager 2>&1 | FileCheck %s --check-prefix=NOSLP
+// SLP: extractelement
+// NOSLP-NOT: extractelement
 
-void foo(double *a) {
-  for (int i = 0; i < 1000; i++)
-    a[i] = 10;
+int foo(double *A, int n, int m) {
+  double sum = 0, v1 = 2, v0 = 3;
+  for (int i=0; i < n; ++i)
+    sum += 7*A[i*2] + 7*A[i*2+1];
+  return sum;
 }
+
Index: clang/test/CodeGen/thinlto-loop-vectorize-pm.c
===================================================================
--- /dev/null
+++ clang/test/CodeGen/thinlto-loop-vectorize-pm.c
@@ -0,0 +1,44 @@
+// REQUIRES: x86-registered-target
+// RUN: %clang_cc1 -o %t.o -O2 -flto=thin -fexperimental-new-pass-manager -triple x86_64-unknown-linux-gnu -emit-llvm-bc %s
+// RUN: llvm-lto -thinlto -o %t %t.o
+
+// Test to ensure the loop vectorize codegen option is passed down to the
+// ThinLTO backend. -vectorize-loops is a cc1 option and will be added
+// automatically when O2/O3/Os is available for clang. Also check that
+// "-mllvm -vectorize-loops=false" will disable loop vectorization, overriding
+// the cc1 option.
+//
+// Check both the new and old PMs.
+//
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-obj -O2 -vectorize-loops -mllvm -force-vector-width=2 -mllvm -force-vector-interleave=1 -emit-llvm -o - -x ir %t.o -fthinlto-index=%t.thinlto.bc -fexperimental-new-pass-manager 2>&1 | FileCheck %s --check-prefix=O2-LPV
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-obj -O2 -vectorize-loops -mllvm -force-vector-width=2 -mllvm -force-vector-interleave=1 -emit-llvm -o - -x ir %t.o -fthinlto-index=%t.thinlto.bc 2>&1 | FileCheck %s --check-prefix=O2-LPV
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-obj -O2 -vectorize-loops -mllvm -vectorize-loops=false -mllvm -force-vector-width=2 -mllvm -force-vector-interleave=1 -emit-llvm -o - -x ir %t.o -fthinlto-index=%t.thinlto.bc -fexperimental-new-pass-manager 2>&1 | FileCheck %s --check-prefix=O2-NOLPV
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-obj -O2 -vectorize-loops -mllvm -vectorize-loops=false -mllvm -force-vector-width=2 -mllvm -force-vector-interleave=1 -emit-llvm -o - -x ir %t.o -fthinlto-index=%t.thinlto.bc 2>&1 | FileCheck %s --check-prefix=O2-NOLPV
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-obj -O0 -vectorize-loops -mllvm -force-vector-width=2 -mllvm -force-vector-interleave=1 -emit-llvm -o - -x ir %t.o -fthinlto-index=%t.thinlto.bc -fexperimental-new-pass-manager 2>&1 | FileCheck %s --check-prefix=O0-LPV
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-obj -O0 -vectorize-loops -mllvm -force-vector-width=2 -mllvm -force-vector-interleave=1 -emit-llvm -o - -x ir %t.o -fthinlto-index=%t.thinlto.bc 2>&1 | FileCheck %s --check-prefix=O0-LPV
+// O2-LPV: = !{!"llvm.loop.isvectorized", i32 1}
+// O2-NOLPV-NOT: = !{!"llvm.loop.isvectorized", i32 1}
+// O0-LPV-NOT: = !{!"llvm.loop.isvectorized", i32 1}
+
+// Test to ensure the loop interleave codegen option is passed down to the
+// ThinLTO backend. The internal loop interleave codegen option will be
+// enabled automatically when O2/O3 is available for clang. Also check that
+// "-mllvm -interleave-loops=false" will disable the interleaving, overriding
+// the cc1 option.
+//
+// Check both the new and old PMs.
+//
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-obj -O2 -vectorize-loops -mllvm -force-vector-width=2 -emit-llvm -o - -x ir %t.o -fthinlto-index=%t.thinlto.bc -fexperimental-new-pass-manager 2>&1 | FileCheck %s --check-prefix=O2-InterLeave
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-obj -O2 -vectorize-loops -mllvm -interleave-loops=false -mllvm -force-vector-width=2 -emit-llvm -o - -x ir %t.o -fthinlto-index=%t.thinlto.bc -fexperimental-new-pass-manager 2>&1 | FileCheck %s --check-prefix=O2-NoInterLeave
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-obj -O0 -vectorize-loops -mllvm -force-vector-width=2 -emit-llvm -o - -x ir %t.o -fthinlto-index=%t.thinlto.bc -fexperimental-new-pass-manager 2>&1 | FileCheck %s --check-prefix=O0-InterLeave
+// O2-InterLeave-COUNT-2: store <2 x double>
+// O2-InterLeave: = !{!"llvm.loop.isvectorized", i32 1}
+// O2-NoInterLeave-COUNT-1: store <2 x double>
+// O2-NoInterLeave-NOT: store <2 x double>
+// O2-NoInterLeave: = !{!"llvm.loop.isvectorized", i32 1}
+// O0-InterLeave-NOT: = !{!"llvm.loop.isvectorized", i32 1}
+
+void foo(double *a) {
+  for (int i = 0; i < 1000; i++)
+    a[i] = 10;
+}

_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D77989: Allow disabling of vectorization using internal options

Reply via email to