This revision was automatically updated to reflect the committed changes.
Closed by commit rG00769d69fbaa: [flang] Add -fppc-native-vector-element-order 
option to control the element… (authored by kkwli0).
Herald added a project: clang.
Herald added a subscriber: cfe-commits.

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D155852/new/

https://reviews.llvm.org/D155852

Files:
  clang/include/clang/Driver/Options.td
  clang/lib/Driver/ToolChains/Flang.cpp
  flang/include/flang/Lower/CustomIntrinsicCall.h
  flang/include/flang/Lower/LoweringOptions.def
  flang/include/flang/Optimizer/Builder/IntrinsicCall.h
  flang/include/flang/Optimizer/Builder/PPCIntrinsicCall.h
  flang/lib/Frontend/CompilerInvocation.cpp
  flang/lib/Lower/ConvertExpr.cpp
  flang/lib/Lower/CustomIntrinsicCall.cpp
  flang/lib/Optimizer/Builder/IntrinsicCall.cpp
  flang/lib/Optimizer/Builder/PPCIntrinsicCall.cpp
  flang/test/Driver/driver-help-hidden.f90
  flang/test/Driver/driver-help.f90
  flang/test/Driver/frontend-forwarding.f90
  flang/test/Lower/PowerPC/ppc-vec_cvf-elem-order.f90
  flang/tools/bbc/bbc.cpp

Index: flang/tools/bbc/bbc.cpp
===================================================================
--- flang/tools/bbc/bbc.cpp
+++ flang/tools/bbc/bbc.cpp
@@ -186,6 +186,11 @@
     llvm::cl::desc("enable polymorphic type lowering (experimental)"),
     llvm::cl::init(false));
 
+static llvm::cl::opt<bool> enableNoPPCNativeVecElemOrder(
+    "fno-ppc-native-vector-element-order",
+    llvm::cl::desc("no PowerPC native vector element order."),
+    llvm::cl::init(false));
+
 static llvm::cl::opt<bool> useHLFIR("hlfir",
                                     llvm::cl::desc("Lower to high level FIR"),
                                     llvm::cl::init(false));
@@ -289,6 +294,7 @@
   // Use default lowering options for bbc.
   Fortran::lower::LoweringOptions loweringOptions{};
   loweringOptions.setPolymorphicTypeImpl(enablePolymorphic);
+  loweringOptions.setNoPPCNativeVecElemOrder(enableNoPPCNativeVecElemOrder);
   loweringOptions.setLowerToHighLevelFIR(useHLFIR || emitHLFIR);
   auto burnside = Fortran::lower::LoweringBridge::create(
       ctx, semanticsContext, defKinds, semanticsContext.intrinsics(),
Index: flang/test/Lower/PowerPC/ppc-vec_cvf-elem-order.f90
===================================================================
--- /dev/null
+++ flang/test/Lower/PowerPC/ppc-vec_cvf-elem-order.f90
@@ -0,0 +1,37 @@
+! RUN: bbc -emit-fir %s -fno-ppc-native-vector-element-order=true -o - | FileCheck --check-prefixes="FIR" %s
+! RUN: %flang_fc1 -emit-llvm %s -fno-ppc-native-vector-element-order -o - | FileCheck --check-prefixes="LLVMIR" %s
+! REQUIRES: target=powerpc{{.*}}
+
+! CHECK-LABEL: vec_cvf_test_r4r8
+subroutine vec_cvf_test_r4r8(arg1)
+  vector(real(8)), intent(in) :: arg1
+  vector(real(4)) :: r
+  r = vec_cvf(arg1)
+
+! FIR: %[[arg:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<2:f64>>
+! FIR: %[[carg:.*]] = fir.convert %[[arg]] : (!fir.vector<2:f64>) -> vector<2xf64>
+! FIR: %[[call:.*]] = fir.call @llvm.ppc.vsx.xvcvdpsp(%[[carg]]) fastmath<contract> : (vector<2xf64>) -> !fir.vector<4:f32>
+! FIR: %[[ccall:.*]] = fir.convert %[[call]] : (!fir.vector<4:f32>) -> vector<4xf32>
+! FIR: %[[r:.*]] = fir.convert %[[ccall]] : (vector<4xf32>) -> !fir.vector<4:f32>
+! FIR: fir.store %[[r]] to %{{.*}} : !fir.ref<!fir.vector<4:f32>>
+
+! LLVMIR: %[[arg:.*]] = load <2 x double>, ptr %{{.*}}, align 16
+! LLVMIR: %[[call:.*]] = call contract <4 x float> @llvm.ppc.vsx.xvcvdpsp(<2 x double> %[[arg]])
+! LLVMIR: store <4 x float> %[[call]], ptr %{{.*}}, align 16
+end subroutine vec_cvf_test_r4r8
+
+! CHECK-LABEL: vec_cvf_test_r8r4
+subroutine vec_cvf_test_r8r4(arg1)
+  vector(real(4)), intent(in) :: arg1
+  vector(real(8)) :: r
+  r = vec_cvf(arg1)
+
+! FIR: %[[arg:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:f32>>
+! FIR: %[[carg:.*]] = fir.convert %[[arg]] : (!fir.vector<4:f32>) -> vector<4xf32>
+! FIR: %[[call:.*]] = fir.call @llvm.ppc.vsx.xvcvspdp(%[[carg]]) fastmath<contract> : (vector<4xf32>) -> !fir.vector<2:f64>
+! FIR: fir.store %[[call]] to %{{.*}} : !fir.ref<!fir.vector<2:f64>>
+
+! LLVMIR: %[[arg:.*]] = load <4 x float>, ptr %{{.*}}, align 16
+! LLVMIR: %[[r:.*]] = call contract <2 x double> @llvm.ppc.vsx.xvcvspdp(<4 x float> %[[arg]])
+! LLVMIR: store <2 x double> %[[r]], ptr %{{.*}}, align 16
+end subroutine vec_cvf_test_r8r4
Index: flang/test/Driver/frontend-forwarding.f90
===================================================================
--- flang/test/Driver/frontend-forwarding.f90
+++ flang/test/Driver/frontend-forwarding.f90
@@ -18,6 +18,8 @@
 ! RUN:     -fversion-loops-for-stride \
 ! RUN:     -flang-experimental-polymorphism \
 ! RUN:     -flang-experimental-hlfir \
+! RUN:     -fno-ppc-native-vector-element-order \
+! RUN:     -fppc-native-vector-element-order \
 ! RUN:     -mllvm -print-before-all \
 ! RUN:     -save-temps=obj \
 ! RUN:     -P \
@@ -40,5 +42,7 @@
 ! CHECK: "-fversion-loops-for-stride"
 ! CHECK: "-flang-experimental-polymorphism"
 ! CHECK: "-flang-experimental-hlfir"
+! CHECK: "-fno-ppc-native-vector-element-order"
+! CHECK: "-fppc-native-vector-element-order"
 ! CHECK: "-mllvm" "-print-before-all"
 ! CHECK: "-save-temps=obj"
Index: flang/test/Driver/driver-help.f90
===================================================================
--- flang/test/Driver/driver-help.f90
+++ flang/test/Driver/driver-help.f90
@@ -46,6 +46,8 @@
 ! HELP-NEXT: -fno-automatic         Implies the SAVE attribute for non-automatic local objects in subprograms unless RECURSIVE
 ! HELP-NEXT: -fno-color-diagnostics  Disable colors in diagnostics
 ! HELP-NEXT: -fno-integrated-as      Disable the integrated assembler
+! HELP-NEXT: -fno-ppc-native-vector-element-order
+! HELP-NEXT:                        Specifies PowerPC non-native vector element order
 ! HELP-NEXT: -fno-signed-zeros      Allow optimizations that ignore the sign of floating point zeros
 ! HELP-NEXT: -fno-stack-arrays      Allocate array temporaries on the heap (default)
 ! HELP-NEXT: -fno-version-loops-for-stride
@@ -59,6 +61,8 @@
 ! HELP-NEXT: -foptimization-record-passes=<regex>
 ! HELP-NEXT:                        Only include passes which match a specified regular expression in the generated optimization record (by default, include all passes)
 ! HELP-NEXT: -fpass-plugin=<dsopath> Load pass plugin from a dynamic shared object file (only with new pass manager).
+! HELP-NEXT: -fppc-native-vector-element-order
+! HELP-NEXT:                        Specifies PowerPC native vector element order
 ! HELP-NEXT: -freciprocal-math      Allow division operations to be reassociated
 ! HELP-NEXT: -fsave-optimization-record=<format>
 ! HELP-NEXT:                        Generate an optimization record file in a specific format
@@ -158,6 +162,8 @@
 ! HELP-FC1-NEXT:                        Do not use the analyzed objects when unparsing
 ! HELP-FC1-NEXT: -fno-automatic         Implies the SAVE attribute for non-automatic local objects in subprograms unless RECURSIVE
 ! HELP-FC1-NEXT: -fno-debug-pass-manager Disables debug printing for the new pass manager
+! HELP-FC1-NEXT: -fno-ppc-native-vector-element-order
+! HELP-FC1-NEXT:                        Specifies PowerPC non-native vector element order
 ! HELP-FC1-NEXT: -fno-reformat          Dump the cooked character stream in -E mode
 ! HELP-FC1-NEXT: -fno-signed-zeros      Allow optimizations that ignore the sign of floating point zeros
 ! HELP-FC1-NEXT: -fno-stack-arrays      Allocate array temporaries on the heap (default)
@@ -173,6 +179,8 @@
 ! HELP-FC1-NEXT:                        Set OpenMP version (e.g. 45 for OpenMP 4.5, 50 for OpenMP 5.0). Default value is 50 for Clang and 11 for Flang
 ! HELP-FC1-NEXT: -fopenmp               Parse OpenMP pragmas and generate parallel code.
 ! HELP-FC1-NEXT: -fpass-plugin=<dsopath> Load pass plugin from a dynamic shared object file (only with new pass manager).
+! HELP-FC1-NEXT: -fppc-native-vector-element-order
+! HELP-FC1-NEXT:                        Specifies PowerPC native vector element order
 ! HELP-FC1-NEXT: -freciprocal-math      Allow division operations to be reassociated
 ! HELP-FC1-NEXT: -fstack-arrays         Attempt to allocate array temporaries on the stack, no matter their size
 ! HELP-FC1-NEXT: -fsyntax-only          Run the preprocessor, parser and semantic analysis stages
Index: flang/test/Driver/driver-help-hidden.f90
===================================================================
--- flang/test/Driver/driver-help-hidden.f90
+++ flang/test/Driver/driver-help-hidden.f90
@@ -50,6 +50,8 @@
 ! CHECK-NEXT: -fno-automatic         Implies the SAVE attribute for non-automatic local objects in subprograms unless RECURSIVE
 ! CHECK-NEXT: -fno-color-diagnostics  Disable colors in diagnostics
 ! CHECK-NEXT: -fno-integrated-as     Disable the integrated assembler
+! CHECK-NEXT: -fno-ppc-native-vector-element-order
+! CHECK-NEXT:                        Specifies PowerPC non-native vector element order
 ! CHECK-NEXT: -fno-signed-zeros      Allow optimizations that ignore the sign of floating point zeros
 ! CHECK-NEXT: -fno-stack-arrays      Allocate array temporaries on the heap (default)
 ! CHECK-NEXT: -fno-version-loops-for-stride
@@ -63,6 +65,8 @@
 ! CHECK-NEXT: -foptimization-record-passes=<regex>
 ! CHECK-NEXT:                        Only include passes which match a specified regular expression in the generated optimization record (by default, include all passes)
 ! CHECK-NEXT: -fpass-plugin=<dsopath> Load pass plugin from a dynamic shared object file (only with new pass manager).
+! CHECK-NEXT: -fppc-native-vector-element-order
+! CHECK-NEXT:                        Specifies PowerPC native vector element order
 ! CHECK-NEXT: -freciprocal-math      Allow division operations to be reassociated
 ! CHECK-NEXT: -fsave-optimization-record=<format>
 ! CHECK-NEXT:                        Generate an optimization record file in a specific format
Index: flang/lib/Optimizer/Builder/PPCIntrinsicCall.cpp
===================================================================
--- flang/lib/Optimizer/Builder/PPCIntrinsicCall.cpp
+++ flang/lib/Optimizer/Builder/PPCIntrinsicCall.cpp
@@ -360,6 +360,20 @@
   return ppcMathOps.equal_range(name);
 }
 
+// Helper functions for vector element ordering.
+bool PPCIntrinsicLibrary::isBEVecElemOrderOnLE() {
+  return (Fortran::evaluate::isHostLittleEndian &&
+          converter->getLoweringOptions().getNoPPCNativeVecElemOrder());
+}
+bool PPCIntrinsicLibrary::isNativeVecElemOrderOnLE() {
+  return (Fortran::evaluate::isHostLittleEndian &&
+          !converter->getLoweringOptions().getNoPPCNativeVecElemOrder());
+}
+bool PPCIntrinsicLibrary::changeVecElemOrder() {
+  return (Fortran::evaluate::isHostLittleEndian !=
+          converter->getLoweringOptions().getNoPPCNativeVecElemOrder());
+}
+
 static mlir::FunctionType genMmaVpFuncType(mlir::MLIRContext *context,
                                            int quadCnt, int pairCnt, int vecCnt,
                                            int intCnt = 0,
@@ -1014,8 +1028,8 @@
 
     mlir::Value newArgs[]{vArg1};
     if (vecTyInfo.isFloat32()) {
-      // TODO: Handle element ordering
-      newArgs[0] = swapVectorWordPairs(builder, loc, newArgs[0]);
+      if (changeVecElemOrder())
+        newArgs[0] = swapVectorWordPairs(builder, loc, newArgs[0]);
 
       const llvm::StringRef fname{"llvm.ppc.vsx.xvcvspdp"};
       auto ftype{
@@ -1036,8 +1050,8 @@
       auto mvf32Ty{mlir::VectorType::get(4, f32type)};
       newArgs[0] = builder.createConvert(loc, mvf32Ty, newArgs[0]);
 
-      // TODO: Handle element ordering
-      newArgs[0] = swapVectorWordPairs(builder, loc, newArgs[0]);
+      if (changeVecElemOrder())
+        newArgs[0] = swapVectorWordPairs(builder, loc, newArgs[0]);
 
       return builder.createConvert(loc, fvf32Ty, newArgs[0]);
     }
Index: flang/lib/Optimizer/Builder/IntrinsicCall.cpp
===================================================================
--- flang/lib/Optimizer/Builder/IntrinsicCall.cpp
+++ flang/lib/Optimizer/Builder/IntrinsicCall.cpp
@@ -5770,9 +5770,10 @@
 std::pair<fir::ExtendedValue, bool>
 genIntrinsicCall(fir::FirOpBuilder &builder, mlir::Location loc,
                  llvm::StringRef name, std::optional<mlir::Type> resultType,
-                 llvm::ArrayRef<fir::ExtendedValue> args) {
-  return IntrinsicLibrary{builder, loc}.genIntrinsicCall(name, resultType,
-                                                         args);
+                 llvm::ArrayRef<fir::ExtendedValue> args,
+                 Fortran::lower::AbstractConverter *converter) {
+  return IntrinsicLibrary{builder, loc, converter}.genIntrinsicCall(
+      name, resultType, args);
 }
 
 mlir::Value genMax(fir::FirOpBuilder &builder, mlir::Location loc,
Index: flang/lib/Lower/CustomIntrinsicCall.cpp
===================================================================
--- flang/lib/Lower/CustomIntrinsicCall.cpp
+++ flang/lib/Lower/CustomIntrinsicCall.cpp
@@ -98,9 +98,10 @@
                                  llvm::StringRef name,
                                  std::optional<mlir::Type> resultType,
                                  llvm::ArrayRef<fir::ExtendedValue> args,
-                                 Fortran::lower::StatementContext &stmtCtx) {
+                                 Fortran::lower::StatementContext &stmtCtx,
+                                 Fortran::lower::AbstractConverter *converter) {
   auto [result, mustBeFreed] =
-      fir::genIntrinsicCall(builder, loc, name, resultType, args);
+      fir::genIntrinsicCall(builder, loc, name, resultType, args, converter);
   if (mustBeFreed) {
     mlir::Value addr = fir::getBase(result);
     if (auto *box = result.getBoxOf<fir::BoxValue>())
Index: flang/lib/Lower/ConvertExpr.cpp
===================================================================
--- flang/lib/Lower/ConvertExpr.cpp
+++ flang/lib/Lower/ConvertExpr.cpp
@@ -1928,7 +1928,7 @@
     }
     // Let the intrinsic library lower the intrinsic procedure call
     return Fortran::lower::genIntrinsicCall(builder, getLoc(), name, resultType,
-                                            operands, stmtCtx);
+                                            operands, stmtCtx, &converter);
   }
 
   /// helper to detect statement functions
Index: flang/lib/Frontend/CompilerInvocation.cpp
===================================================================
--- flang/lib/Frontend/CompilerInvocation.cpp
+++ flang/lib/Frontend/CompilerInvocation.cpp
@@ -954,6 +954,11 @@
     res.loweringOpts.setPolymorphicTypeImpl(true);
   }
 
+  // -fno-ppc-native-vector-element-order
+  if (args.hasArg(clang::driver::options::OPT_fno_ppc_native_vec_elem_order)) {
+    res.loweringOpts.setNoPPCNativeVecElemOrder(true);
+  }
+
   success &= parseFrontendArgs(res.getFrontendOpts(), args, diags);
   parseTargetArgs(res.getTargetOpts(), args);
   parsePreprocessorArgs(res.getPreprocessorOpts(), args);
Index: flang/include/flang/Optimizer/Builder/PPCIntrinsicCall.h
===================================================================
--- flang/include/flang/Optimizer/Builder/PPCIntrinsicCall.h
+++ flang/include/flang/Optimizer/Builder/PPCIntrinsicCall.h
@@ -135,6 +135,11 @@
   PPCIntrinsicLibrary() = delete;
   PPCIntrinsicLibrary(const PPCIntrinsicLibrary &) = delete;
 
+  // Helper functions for vector element ordering.
+  bool isBEVecElemOrderOnLE();
+  bool isNativeVecElemOrderOnLE();
+  bool changeVecElemOrder();
+
   // PPC MMA intrinsic generic handler
   template <MMAOp IntrId, MMAHandlerOp HandlerOp>
   void genMmaIntr(llvm::ArrayRef<fir::ExtendedValue>);
Index: flang/include/flang/Optimizer/Builder/IntrinsicCall.h
===================================================================
--- flang/include/flang/Optimizer/Builder/IntrinsicCall.h
+++ flang/include/flang/Optimizer/Builder/IntrinsicCall.h
@@ -9,6 +9,7 @@
 #ifndef FORTRAN_LOWER_INTRINSICCALL_H
 #define FORTRAN_LOWER_INTRINSICCALL_H
 
+#include "flang/Lower/AbstractConverter.h"
 #include "flang/Optimizer/Builder/BoxValue.h"
 #include "flang/Optimizer/Builder/FIRBuilder.h"
 #include "flang/Optimizer/Builder/Runtime/Character.h"
@@ -34,7 +35,8 @@
 std::pair<fir::ExtendedValue, bool>
 genIntrinsicCall(fir::FirOpBuilder &, mlir::Location, llvm::StringRef name,
                  std::optional<mlir::Type> resultType,
-                 llvm::ArrayRef<fir::ExtendedValue> args);
+                 llvm::ArrayRef<fir::ExtendedValue> args,
+                 Fortran::lower::AbstractConverter *converter = nullptr);
 
 /// Enums used to templatize and share lowering of MIN and MAX.
 enum class Extremum { Min, Max };
@@ -124,8 +126,10 @@
 struct IntrinsicLibrary {
 
   // Constructors.
-  explicit IntrinsicLibrary(fir::FirOpBuilder &builder, mlir::Location loc)
-      : builder{builder}, loc{loc} {}
+  explicit IntrinsicLibrary(
+      fir::FirOpBuilder &builder, mlir::Location loc,
+      Fortran::lower::AbstractConverter *converter = nullptr)
+      : builder{builder}, loc{loc}, converter{converter} {}
   IntrinsicLibrary() = delete;
   IntrinsicLibrary(const IntrinsicLibrary &) = delete;
 
@@ -416,6 +420,7 @@
   fir::FirOpBuilder &builder;
   mlir::Location loc;
   bool resultMustBeFreed = false;
+  Fortran::lower::AbstractConverter *converter = nullptr;
 };
 
 struct IntrinsicDummyArgument {
Index: flang/include/flang/Lower/LoweringOptions.def
===================================================================
--- flang/include/flang/Lower/LoweringOptions.def
+++ flang/include/flang/Lower/LoweringOptions.def
@@ -31,5 +31,8 @@
 /// Off by default until fully ready.
 ENUM_LOWERINGOPT(LowerToHighLevelFIR, unsigned, 1, 0)
 
+/// If true, reverse PowerPC native vector element order.
+ENUM_LOWERINGOPT(NoPPCNativeVecElemOrder, unsigned, 1, 0)
+
 #undef LOWERINGOPT
 #undef ENUM_LOWERINGOPT
Index: flang/include/flang/Lower/CustomIntrinsicCall.h
===================================================================
--- flang/include/flang/Lower/CustomIntrinsicCall.h
+++ flang/include/flang/Lower/CustomIntrinsicCall.h
@@ -103,11 +103,12 @@
 /// Generate the FIR+MLIR operations for the generic intrinsic \p name
 /// with argument \p args and expected result type \p resultType.
 /// Returned fir::ExtendedValue is the returned Fortran intrinsic value.
-fir::ExtendedValue genIntrinsicCall(fir::FirOpBuilder &builder,
-                                    mlir::Location loc, llvm::StringRef name,
-                                    std::optional<mlir::Type> resultType,
-                                    llvm::ArrayRef<fir::ExtendedValue> args,
-                                    StatementContext &stmtCtx);
+fir::ExtendedValue
+genIntrinsicCall(fir::FirOpBuilder &builder, mlir::Location loc,
+                 llvm::StringRef name, std::optional<mlir::Type> resultType,
+                 llvm::ArrayRef<fir::ExtendedValue> args,
+                 StatementContext &stmtCtx,
+                 Fortran::lower::AbstractConverter *converter = nullptr);
 
 } // namespace lower
 } // namespace Fortran
Index: clang/lib/Driver/ToolChains/Flang.cpp
===================================================================
--- clang/lib/Driver/ToolChains/Flang.cpp
+++ clang/lib/Driver/ToolChains/Flang.cpp
@@ -144,7 +144,9 @@
     CmdArgs.push_back("-fversion-loops-for-stride");
 
   Args.AddAllArgs(CmdArgs, {options::OPT_flang_experimental_hlfir,
-                            options::OPT_flang_experimental_polymorphism});
+                            options::OPT_flang_experimental_polymorphism,
+                            options::OPT_fno_ppc_native_vec_elem_order,
+                            options::OPT_fppc_native_vec_elem_order});
 }
 
 void Flang::addPicOptions(const ArgList &Args, ArgStringList &CmdArgs) const {
Index: clang/include/clang/Driver/Options.td
===================================================================
--- clang/include/clang/Driver/Options.td
+++ clang/include/clang/Driver/Options.td
@@ -5416,6 +5416,9 @@
 defm logical_abbreviations : OptInFC1FFlag<"logical-abbreviations", "Enable logical abbreviations">;
 defm implicit_none : OptInFC1FFlag<"implicit-none", "No implicit typing allowed unless overridden by IMPLICIT statements">;
 defm underscoring : OptInFC1FFlag<"underscoring", "Appends one trailing underscore to external names">;
+defm ppc_native_vec_elem_order: BoolOptionWithoutMarshalling<"f", "ppc-native-vector-element-order",
+  PosFlag<SetTrue, [], "Specifies PowerPC native vector element order (default)">,
+  NegFlag<SetFalse, [], "Specifies PowerPC non-native vector element order">>;
 
 def fno_automatic : Flag<["-"], "fno-automatic">, Group<f_Group>,
   HelpText<"Implies the SAVE attribute for non-automatic local objects in subprograms unless RECURSIVE">;
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to