[PATCH] D72110: [X86] ABI compat bugfix for MSVC vectorcall

2020-01-14 Thread Reid Kleckner via Phabricator via cfe-commits
This revision was automatically updated to reflect the committed changes.
Closed by commit rG8e780252a728: [X86] ABI compat bugfix for MSVC vectorcall 
(authored by rnk).

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D72110/new/

https://reviews.llvm.org/D72110

Files:
  clang/include/clang/CodeGen/CGFunctionInfo.h
  clang/lib/CodeGen/TargetInfo.cpp
  clang/test/CodeGen/vectorcall.c

Index: clang/test/CodeGen/vectorcall.c
===
--- clang/test/CodeGen/vectorcall.c
+++ clang/test/CodeGen/vectorcall.c
@@ -116,3 +116,24 @@
 // X32: define dso_local x86_vectorcallcc void @"\01HVAAnywhere@@88"(%struct.HFA2 inreg %p1.coerce, i32 inreg %p2, i32 inreg %p3, float %p4, i32 %p5, i32 %p6, %struct.HFA4* %p7, %struct.HFA2 inreg %p8.coerce, float %p9)
 // X64: define dso_local x86_vectorcallcc void @"\01HVAAnywhere@@112"(%struct.HFA2 inreg %p1.coerce, i32 %p2, i32 %p3, float %p4, i32 %p5, i32 %p6, %struct.HFA4* %p7, %struct.HFA2 inreg %p8.coerce, float %p9) 
 
+#ifndef __x86_64__
+// This covers the three ways XMM values can be passed on 32-bit x86:
+// - directly in XMM register (xmm5)
+// - indirectly by address, address in GPR (ecx)
+// - indirectly by address, address on stack
+void __vectorcall vectorcall_indirect_vec(
+double xmm0, double xmm1, double xmm2, double xmm3, double xmm4,
+v4f32 xmm5, v4f32 ecx, int edx, v4f32 mem) {
+}
+
+// X32: define dso_local x86_vectorcallcc void @"\01vectorcall_indirect_vec@@{{[0-9]+}}"
+// X32-SAME: (double %xmm0,
+// X32-SAME: double %xmm1,
+// X32-SAME: double %xmm2,
+// X32-SAME: double %xmm3,
+// X32-SAME: double %xmm4,
+// X32-SAME: <4 x float> %xmm5,
+// X32-SAME: <4 x float>* inreg %0,
+// X32-SAME: i32 inreg %edx,
+// X32-SAME: <4 x float>* %1)
+#endif
Index: clang/lib/CodeGen/TargetInfo.cpp
===
--- clang/lib/CodeGen/TargetInfo.cpp
+++ clang/lib/CodeGen/TargetInfo.cpp
@@ -22,6 +22,7 @@
 #include "clang/Basic/CodeGenOptions.h"
 #include "clang/CodeGen/CGFunctionInfo.h"
 #include "clang/CodeGen/SwiftCallingConv.h"
+#include "llvm/ADT/SmallBitVector.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/StringSwitch.h"
 #include "llvm/ADT/Triple.h"
@@ -996,11 +997,13 @@
 
 /// Similar to llvm::CCState, but for Clang.
 struct CCState {
-  CCState(unsigned CC) : CC(CC), FreeRegs(0), FreeSSERegs(0) {}
+  CCState(CGFunctionInfo )
+  : IsPreassigned(FI.arg_size()), CC(FI.getCallingConvention()) {}
 
-  unsigned CC;
-  unsigned FreeRegs;
-  unsigned FreeSSERegs;
+  llvm::SmallBitVector IsPreassigned;
+  unsigned CC = CallingConv::CC_C;
+  unsigned FreeRegs = 0;
+  unsigned FreeSSERegs = 0;
 };
 
 enum {
@@ -1071,8 +1074,7 @@
   void addFieldToArgStruct(SmallVector ,
CharUnits , ABIArgInfo ,
QualType Type) const;
-  void computeVectorCallArgs(CGFunctionInfo , CCState ,
- bool ) const;
+  void runVectorCallFirstPass(CGFunctionInfo , CCState ) const;
 
 public:
 
@@ -1640,9 +1642,38 @@
   return true;
 }
 
+void X86_32ABIInfo::runVectorCallFirstPass(CGFunctionInfo , CCState ) const {
+  // Vectorcall x86 works subtly different than in x64, so the format is
+  // a bit different than the x64 version.  First, all vector types (not HVAs)
+  // are assigned, with the first 6 ending up in the [XYZ]MM0-5 registers.
+  // This differs from the x64 implementation, where the first 6 by INDEX get
+  // registers.
+  // In the second pass over the arguments, HVAs are passed in the remaining
+  // vector registers if possible, or indirectly by address. The address will be
+  // passed in ECX/EDX if available. Any other arguments are passed according to
+  // the usual fastcall rules.
+  MutableArrayRef Args = FI.arguments();
+  for (int I = 0, E = Args.size(); I < E; ++I) {
+const Type *Base = nullptr;
+uint64_t NumElts = 0;
+const QualType  = Args[I].type;
+if ((Ty->isVectorType() || Ty->isBuiltinType()) &&
+isHomogeneousAggregate(Ty, Base, NumElts)) {
+  if (State.FreeSSERegs >= NumElts) {
+State.FreeSSERegs -= NumElts;
+Args[I].info = ABIArgInfo::getDirect();
+State.IsPreassigned.set(I);
+  }
+}
+  }
+}
+
 ABIArgInfo X86_32ABIInfo::classifyArgumentType(QualType Ty,
CCState ) const {
   // FIXME: Set alignment on indirect arguments.
+  bool IsFastCall = State.CC == llvm::CallingConv::X86_FastCall;
+  bool IsRegCall = State.CC == llvm::CallingConv::X86_RegCall;
+  bool IsVectorCall = State.CC == llvm::CallingConv::X86_VectorCall;
 
   Ty = useFirstFieldIfTransparentUnion(Ty);
 
@@ -1662,11 +1693,16 @@
   // to other targets.
   const Type *Base = nullptr;
   uint64_t NumElts = 0;
-  if (State.CC == llvm::CallingConv::X86_RegCall &&
+  if ((IsRegCall || IsVectorCall) &&
   isHomogeneousAggregate(Ty, Base, NumElts)) {

[PATCH] D72110: [X86] ABI compat bugfix for MSVC vectorcall

2020-01-05 Thread Craig Topper via Phabricator via cfe-commits
craig.topper accepted this revision.
craig.topper added a comment.
This revision is now accepted and ready to land.

LGTM


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D72110/new/

https://reviews.llvm.org/D72110



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D72110: [X86] ABI compat bugfix for MSVC vectorcall

2020-01-03 Thread pre-merge checks [bot] via Phabricator via cfe-commits
merge_guards_bot added a comment.

{icon check-circle color=green} Unit tests: pass. 61177 tests passed, 0 failed 
and 729 were skipped.

{icon times-circle color=red} clang-tidy: fail. Please fix clang-tidy findings 
.

{icon times-circle color=red} clang-format: fail. Please format your changes 
with clang-format by running `git-clang-format HEAD^` or applying this patch 
.

Build artifacts 
: 
diff.json 
,
 clang-tidy.txt 
,
 clang-format.patch 
,
 CMakeCache.txt 
,
 console-log.txt 
,
 test-results.xml 



Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D72110/new/

https://reviews.llvm.org/D72110



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D72110: [X86] ABI compat bugfix for MSVC vectorcall

2020-01-03 Thread Reid Kleckner via Phabricator via cfe-commits
rnk updated this revision to Diff 236115.
rnk added a comment.

- ZYXMM


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D72110/new/

https://reviews.llvm.org/D72110

Files:
  clang/include/clang/CodeGen/CGFunctionInfo.h
  clang/lib/CodeGen/TargetInfo.cpp
  clang/test/CodeGen/vectorcall.c

Index: clang/test/CodeGen/vectorcall.c
===
--- clang/test/CodeGen/vectorcall.c
+++ clang/test/CodeGen/vectorcall.c
@@ -116,3 +116,24 @@
 // X32: define dso_local x86_vectorcallcc void @"\01HVAAnywhere@@88"(%struct.HFA2 inreg %p1.coerce, i32 inreg %p2, i32 inreg %p3, float %p4, i32 %p5, i32 %p6, %struct.HFA4* %p7, %struct.HFA2 inreg %p8.coerce, float %p9)
 // X64: define dso_local x86_vectorcallcc void @"\01HVAAnywhere@@112"(%struct.HFA2 inreg %p1.coerce, i32 %p2, i32 %p3, float %p4, i32 %p5, i32 %p6, %struct.HFA4* %p7, %struct.HFA2 inreg %p8.coerce, float %p9) 
 
+#ifndef __x86_64__
+// This covers the three ways XMM values can be passed on 32-bit x86:
+// - directly in XMM register (xmm5)
+// - indirectly by address, address in GPR (ecx)
+// - indirectly by address, address on stack
+void __vectorcall vectorcall_indirect_vec(
+double xmm0, double xmm1, double xmm2, double xmm3, double xmm4,
+v4f32 xmm5, v4f32 ecx, int edx, v4f32 mem) {
+}
+
+// X32: define dso_local x86_vectorcallcc void @"\01vectorcall_indirect_vec@@{{[0-9]+}}"
+// X32-SAME: (double %xmm0,
+// X32-SAME: double %xmm1,
+// X32-SAME: double %xmm2,
+// X32-SAME: double %xmm3,
+// X32-SAME: double %xmm4,
+// X32-SAME: <4 x float> %xmm5,
+// X32-SAME: <4 x float>* inreg %0,
+// X32-SAME: i32 inreg %edx,
+// X32-SAME: <4 x float>* %1)
+#endif
Index: clang/lib/CodeGen/TargetInfo.cpp
===
--- clang/lib/CodeGen/TargetInfo.cpp
+++ clang/lib/CodeGen/TargetInfo.cpp
@@ -22,6 +22,7 @@
 #include "clang/Basic/CodeGenOptions.h"
 #include "clang/CodeGen/CGFunctionInfo.h"
 #include "clang/CodeGen/SwiftCallingConv.h"
+#include "llvm/ADT/SmallBitVector.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/StringSwitch.h"
 #include "llvm/ADT/Triple.h"
@@ -996,11 +997,13 @@
 
 /// Similar to llvm::CCState, but for Clang.
 struct CCState {
-  CCState(unsigned CC) : CC(CC), FreeRegs(0), FreeSSERegs(0) {}
+  CCState(CGFunctionInfo )
+  : IsPreassigned(FI.arg_size()), CC(FI.getCallingConvention()) {}
 
-  unsigned CC;
-  unsigned FreeRegs;
-  unsigned FreeSSERegs;
+  llvm::SmallBitVector IsPreassigned;
+  unsigned CC = CallingConv::CC_C;
+  unsigned FreeRegs = 0;
+  unsigned FreeSSERegs = 0;
 };
 
 enum {
@@ -1071,8 +1074,7 @@
   void addFieldToArgStruct(SmallVector ,
CharUnits , ABIArgInfo ,
QualType Type) const;
-  void computeVectorCallArgs(CGFunctionInfo , CCState ,
- bool ) const;
+  void runVectorCallFirstPass(CGFunctionInfo , CCState ) const;
 
 public:
 
@@ -1640,9 +1642,38 @@
   return true;
 }
 
+void X86_32ABIInfo::runVectorCallFirstPass(CGFunctionInfo , CCState ) const {
+  // Vectorcall x86 works subtly different than in x64, so the format is
+  // a bit different than the x64 version.  First, all vector types (not HVAs)
+  // are assigned, with the first 6 ending up in the [XYZ]MM0-5 registers.
+  // This differs from the x64 implementation, where the first 6 by INDEX get
+  // registers.
+  // In the second pass over the arguments, HVAs are passed in the remaining
+  // vector registers if possible, or indirectly by address. The address will be
+  // passed in ECX/EDX if available. Any other arguments are passed according to
+  // the usual fastcall rules.
+  MutableArrayRef Args = FI.arguments();
+  for (int I = 0, E = Args.size(); I < E; ++I) {
+const Type *Base = nullptr;
+uint64_t NumElts = 0;
+const QualType  = Args[I].type;
+if ((Ty->isVectorType() || Ty->isBuiltinType()) &&
+isHomogeneousAggregate(Ty, Base, NumElts)) {
+  if (State.FreeSSERegs >= NumElts) {
+State.FreeSSERegs -= NumElts;
+Args[I].info = ABIArgInfo::getDirect();
+State.IsPreassigned.set(I);
+  }
+}
+  }
+}
+
 ABIArgInfo X86_32ABIInfo::classifyArgumentType(QualType Ty,
CCState ) const {
   // FIXME: Set alignment on indirect arguments.
+  bool IsFastCall = State.CC == llvm::CallingConv::X86_FastCall;
+  bool IsRegCall = State.CC == llvm::CallingConv::X86_RegCall;
+  bool IsVectorCall = State.CC == llvm::CallingConv::X86_VectorCall;
 
   Ty = useFirstFieldIfTransparentUnion(Ty);
 
@@ -1662,11 +1693,16 @@
   // to other targets.
   const Type *Base = nullptr;
   uint64_t NumElts = 0;
-  if (State.CC == llvm::CallingConv::X86_RegCall &&
+  if ((IsRegCall || IsVectorCall) &&
   isHomogeneousAggregate(Ty, Base, NumElts)) {
-
 if (State.FreeSSERegs >= NumElts) {
   State.FreeSSERegs -= NumElts;
+
+  // 

[PATCH] D72110: [X86] ABI compat bugfix for MSVC vectorcall

2020-01-03 Thread Reid Kleckner via Phabricator via cfe-commits
rnk marked an inline comment as done.
rnk added inline comments.



Comment at: clang/lib/CodeGen/TargetInfo.cpp:1648
+  // a bit different than the x64 version.  First, all vector types (not HVAs)
+  // are assigned, with the first 6 ending up in the YMM0-5 or XMM0-5 
registers.
+  // This differs from the x64 implementation, where the first 6 by INDEX get

craig.topper wrote:
> What about ZMM?
This comment was pre-existing, but we can say [XYZ]MM0-5.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D72110/new/

https://reviews.llvm.org/D72110



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D72110: [X86] ABI compat bugfix for MSVC vectorcall

2020-01-03 Thread Craig Topper via Phabricator via cfe-commits
craig.topper added inline comments.



Comment at: clang/lib/CodeGen/TargetInfo.cpp:1648
+  // a bit different than the x64 version.  First, all vector types (not HVAs)
+  // are assigned, with the first 6 ending up in the YMM0-5 or XMM0-5 
registers.
+  // This differs from the x64 implementation, where the first 6 by INDEX get

What about ZMM?


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D72110/new/

https://reviews.llvm.org/D72110



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D72110: [X86] ABI compat bugfix for MSVC vectorcall

2020-01-03 Thread Erich Keane via Phabricator via cfe-commits
erichkeane added a comment.

I don't see anything I have a problem with, but still want @ctopper to have a 
bit of time to take a look.  Ping me monday on both if he doesn't respond.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D72110/new/

https://reviews.llvm.org/D72110



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D72110: [X86] ABI compat bugfix for MSVC vectorcall

2020-01-02 Thread pre-merge checks [bot] via Phabricator via cfe-commits
merge_guards_bot added a comment.

{icon check-circle color=green} Unit tests: pass. 61175 tests passed, 0 failed 
and 729 were skipped.

{icon check-circle color=green} clang-tidy: pass.

{icon times-circle color=red} clang-format: fail. Please format your changes 
with clang-format by running `git-clang-format HEAD^` or applying this patch 
.

Build artifacts 
: 
diff.json 
,
 clang-tidy.txt 
,
 clang-format.patch 
,
 CMakeCache.txt 
,
 console-log.txt 
,
 test-results.xml 



Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D72110/new/

https://reviews.llvm.org/D72110



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D72110: [X86] ABI compat bugfix for MSVC vectorcall

2020-01-02 Thread Reid Kleckner via Phabricator via cfe-commits
rnk created this revision.
rnk added reviewers: erichkeane, craig.topper.
Herald added a project: clang.

Before this change, X86_32ABIInfo::classifyArgument would be called
twice on vector arguments to vectorcall functions. This function has
side effects to track GPR register usage, and this would lead to
incorrect GPR usage in some cases.  The specific case I noticed is from
running out of XMM registers with mixed FP and vector arguments and no
aggregates of any kind. Consider this prototype:

  void __vectorcall vectorcall_indirect_vec(
  double xmm0, double xmm1, double xmm2, double xmm3, double xmm4,
  __m128 xmm5,
  __m128 ecx,
  int edx,
  __m128 mem);

classifyArgument has no effects when called on a plain FP type, but when
called on a vector type, it modifies FreeRegs to model GPR consumption.
However, this should not happen during the vector call first pass.

I refactored the code to unify vectorcall HVA logic with regcall HVA
logic. The conventions pass HVAs in registers differently (expanded vs.
not expanded), but if they do not fit in registers, they both pass them
indirectly by address.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D72110

Files:
  clang/include/clang/CodeGen/CGFunctionInfo.h
  clang/lib/CodeGen/TargetInfo.cpp
  clang/test/CodeGen/vectorcall.c

Index: clang/test/CodeGen/vectorcall.c
===
--- clang/test/CodeGen/vectorcall.c
+++ clang/test/CodeGen/vectorcall.c
@@ -116,3 +116,24 @@
 // X32: define dso_local x86_vectorcallcc void @"\01HVAAnywhere@@88"(%struct.HFA2 inreg %p1.coerce, i32 inreg %p2, i32 inreg %p3, float %p4, i32 %p5, i32 %p6, %struct.HFA4* %p7, %struct.HFA2 inreg %p8.coerce, float %p9)
 // X64: define dso_local x86_vectorcallcc void @"\01HVAAnywhere@@112"(%struct.HFA2 inreg %p1.coerce, i32 %p2, i32 %p3, float %p4, i32 %p5, i32 %p6, %struct.HFA4* %p7, %struct.HFA2 inreg %p8.coerce, float %p9) 
 
+#ifndef __x86_64__
+// This covers the three ways XMM values can be passed on 32-bit x86:
+// - directly in XMM register (xmm5)
+// - indirectly by address, address in GPR (ecx)
+// - indirectly by address, address on stack
+void __vectorcall vectorcall_indirect_vec(
+double xmm0, double xmm1, double xmm2, double xmm3, double xmm4,
+v4f32 xmm5, v4f32 ecx, int edx, v4f32 mem) {
+}
+
+// X32: define dso_local x86_vectorcallcc void @"\01vectorcall_indirect_vec@@{{[0-9]+}}"
+// X32-SAME: (double %xmm0,
+// X32-SAME: double %xmm1,
+// X32-SAME: double %xmm2,
+// X32-SAME: double %xmm3,
+// X32-SAME: double %xmm4,
+// X32-SAME: <4 x float> %xmm5,
+// X32-SAME: <4 x float>* inreg %0,
+// X32-SAME: i32 inreg %edx,
+// X32-SAME: <4 x float>* %1)
+#endif
Index: clang/lib/CodeGen/TargetInfo.cpp
===
--- clang/lib/CodeGen/TargetInfo.cpp
+++ clang/lib/CodeGen/TargetInfo.cpp
@@ -22,6 +22,7 @@
 #include "clang/Basic/CodeGenOptions.h"
 #include "clang/CodeGen/CGFunctionInfo.h"
 #include "clang/CodeGen/SwiftCallingConv.h"
+#include "llvm/ADT/SmallBitVector.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/StringSwitch.h"
 #include "llvm/ADT/Triple.h"
@@ -996,11 +997,13 @@
 
 /// Similar to llvm::CCState, but for Clang.
 struct CCState {
-  CCState(unsigned CC) : CC(CC), FreeRegs(0), FreeSSERegs(0) {}
+  CCState(CGFunctionInfo )
+  : IsPreassigned(FI.arg_size()), CC(FI.getCallingConvention()) {}
 
-  unsigned CC;
-  unsigned FreeRegs;
-  unsigned FreeSSERegs;
+  llvm::SmallBitVector IsPreassigned;
+  unsigned CC = CallingConv::CC_C;
+  unsigned FreeRegs = 0;
+  unsigned FreeSSERegs = 0;
 };
 
 enum {
@@ -1071,8 +1074,7 @@
   void addFieldToArgStruct(SmallVector ,
CharUnits , ABIArgInfo ,
QualType Type) const;
-  void computeVectorCallArgs(CGFunctionInfo , CCState ,
- bool ) const;
+  void runVectorCallFirstPass(CGFunctionInfo , CCState ) const;
 
 public:
 
@@ -1640,9 +1642,38 @@
   return true;
 }
 
+void X86_32ABIInfo::runVectorCallFirstPass(CGFunctionInfo , CCState ) const {
+  // Vectorcall x86 works subtly different than in x64, so the format is
+  // a bit different than the x64 version.  First, all vector types (not HVAs)
+  // are assigned, with the first 6 ending up in the YMM0-5 or XMM0-5 registers.
+  // This differs from the x64 implementation, where the first 6 by INDEX get
+  // registers.
+  // In the second pass over the arguments, HVAs are passed in the remaining
+  // vector registers if possible, or indirectly by address. The address will be
+  // passed in ECX/EDX if available. Any other arguments are passed according to
+  // the usual fastcall rules.
+  MutableArrayRef Args = FI.arguments();
+  for (int I = 0, E = Args.size(); I < E; ++I) {
+const Type *Base = nullptr;
+uint64_t NumElts = 0;
+const QualType  = Args[I].type;
+if ((Ty->isVectorType() || Ty->isBuiltinType()) &&
+