[PATCH] D154007: Reland "Try to implement lambdas with inalloca parameters by forwarding without use of inallocas."

2023-07-26 Thread Amy Huang via Phabricator via cfe-commits
This revision was landed with ongoing or failed builds.
This revision was automatically updated to reflect the committed changes.
Closed by commit rG27dab4d305ac: Reland Try to implement lambdas with 
inalloca parameters by forwarding without… (authored by akhuang).

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D154007/new/

https://reviews.llvm.org/D154007

Files:
  clang/include/clang/CodeGen/CGFunctionInfo.h
  clang/lib/CodeGen/CGCall.cpp
  clang/lib/CodeGen/CGCall.h
  clang/lib/CodeGen/CGClass.cpp
  clang/lib/CodeGen/CGDeclCXX.cpp
  clang/lib/CodeGen/CodeGenABITypes.cpp
  clang/lib/CodeGen/CodeGenFunction.cpp
  clang/lib/CodeGen/CodeGenFunction.h
  clang/lib/CodeGen/CodeGenTypes.h
  clang/lib/CodeGen/Targets/X86.cpp
  clang/test/CodeGenCXX/inalloca-lambda.cpp

Index: clang/test/CodeGenCXX/inalloca-lambda.cpp
===
--- clang/test/CodeGenCXX/inalloca-lambda.cpp
+++ clang/test/CodeGenCXX/inalloca-lambda.cpp
@@ -1,11 +1,63 @@
-// RUN: not %clang_cc1 -triple i686-windows-msvc -emit-llvm -o /dev/null %s  2>&1 | FileCheck %s
+// RUN: %clang_cc1 -triple i686-windows-msvc -emit-llvm -o - %s  2>&1 | FileCheck %s
 
-// PR28299
-// CHECK: error: cannot compile this forwarded non-trivially copyable parameter yet
-
-class A {
+struct A {
+  A();
   A(const A &);
+  int x;
 };
-typedef void (*fptr_t)(A);
-fptr_t fn1() { return [](A) {}; }
+void decayToFp(int (*f)(A));
+void test() {
+  auto ld = [](A a) {
+static int calls = 0;
+++calls;
+return a.x + calls;
+  };
+  decayToFp(ld);
+  ld(A{});
+}
+
+// CHECK: define internal x86_thiscallcc noundef i32
+// CHECK-SAME: @"??R@?0??test@@YAXXZ@QBE?A?@@UA@@@Z"
+// CHECK-SAME: (ptr noundef %this, ptr inalloca(<{ %struct.A }>) %[[ARG:.*]])
+// CHECK: %[[V:.*]] = getelementptr inbounds <{ %struct.A }>, ptr %[[ARG]], i32 0, i32 0
+// CHECK: %call = call x86_thiscallcc noundef i32
+// CHECK-SAME: @"?__impl@@?0??test@@YAXXZ@QBE?A?@@UA@@@Z"
+// CHECK-SAME: (ptr noundef %this, ptr noundef %[[V]])
+
+// CHECK: define internal noundef i32
+// CHECK-SAME: @"?__invoke@@?0??test@@YAXXZ@CA?A?@@UA@@@Z"
+// CHECK-SAME: (ptr inalloca(<{ %struct.A }>) %[[ARG:.*]])
+// CHECK: %unused.capture = alloca %class.anon, align 1
+// CHECK: %[[VAR:.*]] = getelementptr inbounds <{ %struct.A }>, ptr %[[ARG]], i32 0, i32 0
+// CHECK: %call = call x86_thiscallcc noundef i32
+// CHECK-SAME: @"?__impl@@?0??test@@YAXXZ@QBE?A?@@UA@@@Z"
+// CHECK-SAME: (ptr noundef %unused.capture, ptr noundef %[[VAR]])
+// CHECK: ret i32 %call
+
+// CHECK: define internal x86_thiscallcc noundef i32
+// CHECK-SAME: @"?__impl@@?0??test@@YAXXZ@QBE?A?@@UA@@@Z"
+// CHECK-SAME: (ptr noundef %this, ptr noundef %[[ARG:.*]])
+// CHECK: %this.addr = alloca ptr, align 4
+// CHECK: store ptr %this, ptr %this.addr, align 4
+// CHECK: %this1 = load ptr, ptr %this.addr, align 4
+// CHECK: %{{.*}} = load i32, ptr @"?calls@?1???R
+// CHECK: %inc = add nsw i32 %{{.*}}, 1
+// CHECK: store i32 %inc, ptr @"?calls@?1???R
+// CHECK: %{{.*}} = getelementptr inbounds %struct.A, ptr %{{.*}}, i32 0, i32 0
+// CHECK: %{{.*}} = load i32, ptr %{{.*}}, align 4
+// CHECK: %{{.*}} = load i32, ptr @"?calls@?1???R
+// CHECK: %add = add nsw i32 %{{.*}}, %{{.*}}
+// CHECK: ret i32 %add
+
+// Make sure we don't try to copy an uncopyable type.
+struct B {
+  B();
+  B(B &);
+  void operator=(B);
+  long long x;
+} b;
+
+void f() {
+  [](B) {}(b);
+}
 
Index: clang/lib/CodeGen/Targets/X86.cpp
===
--- clang/lib/CodeGen/Targets/X86.cpp
+++ clang/lib/CodeGen/Targets/X86.cpp
@@ -140,7 +140,8 @@
 
   Class classify(QualType Ty) const;
   ABIArgInfo classifyReturnType(QualType RetTy, CCState ) const;
-  ABIArgInfo classifyArgumentType(QualType RetTy, CCState ) const;
+  ABIArgInfo classifyArgumentType(QualType RetTy, CCState ,
+  bool isDelegateCall) const;
 
   /// Updates the number of available free registers, returns
   /// true if any registers were allocated.
@@ -737,8 +738,8 @@
   }
 }
 
-ABIArgInfo X86_32ABIInfo::classifyArgumentType(QualType Ty,
-   CCState ) const {
+ABIArgInfo X86_32ABIInfo::classifyArgumentType(QualType Ty, CCState ,
+   bool isDelegateCall) const {
   // FIXME: Set alignment on indirect arguments.
   bool IsFastCall = State.CC == llvm::CallingConv::X86_FastCall;
   bool IsRegCall = State.CC == llvm::CallingConv::X86_RegCall;
@@ -753,6 +754,12 @@
 CGCXXABI::RecordArgABI RAA = getRecordArgABI(RT, getCXXABI());
 if (RAA == CGCXXABI::RAA_Indirect) {
   return getIndirectResult(Ty, false, State);
+} else if (isDelegateCall) {
+  // Avoid having different alignments on delegate call args by always
+  // setting the alignment to 4, which is what we do for inallocas.
+  ABIArgInfo Res = getIndirectResult(Ty, false, 

[PATCH] D137872: Implement lambdas with inalloca parameters by forwarding to function without inalloca calling convention.

2023-06-20 Thread Amy Huang via Phabricator via cfe-commits
akhuang updated this revision to Diff 533079.
akhuang added a comment.

rebase and clang format


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D137872/new/

https://reviews.llvm.org/D137872

Files:
  clang/include/clang/CodeGen/CGFunctionInfo.h
  clang/lib/CodeGen/CGCall.cpp
  clang/lib/CodeGen/CGCall.h
  clang/lib/CodeGen/CGClass.cpp
  clang/lib/CodeGen/CGDeclCXX.cpp
  clang/lib/CodeGen/CodeGenABITypes.cpp
  clang/lib/CodeGen/CodeGenFunction.cpp
  clang/lib/CodeGen/CodeGenFunction.h
  clang/lib/CodeGen/CodeGenTypes.h
  clang/lib/CodeGen/Targets/X86.cpp
  clang/test/CodeGenCXX/inalloca-lambda.cpp

Index: clang/test/CodeGenCXX/inalloca-lambda.cpp
===
--- clang/test/CodeGenCXX/inalloca-lambda.cpp
+++ clang/test/CodeGenCXX/inalloca-lambda.cpp
@@ -1,11 +1,50 @@
-// RUN: not %clang_cc1 -triple i686-windows-msvc -emit-llvm -o /dev/null %s  2>&1 | FileCheck %s
+// RUN: %clang_cc1 -triple i686-windows-msvc -emit-llvm -o - %s  2>&1 | FileCheck %s
 
-// PR28299
-// CHECK: error: cannot compile this forwarded non-trivially copyable parameter yet
-
-class A {
+struct A {
+  A();
   A(const A &);
+  int x;
 };
-typedef void (*fptr_t)(A);
-fptr_t fn1() { return [](A) {}; }
+void decayToFp(int (*f)(A));
+void test() {
+  auto ld = [](A a) {
+static int calls = 0;
+++calls;
+return a.x + calls;
+  };
+  decayToFp(ld);
+  ld(A{});
+}
+
+// CHECK: define internal x86_thiscallcc noundef i32 
+// CHECK-SAME: @"??R@?0??test@@YAXXZ@QBE?A?@@UA@@@Z"
+// CHECK-SAME: (ptr noundef %this, ptr inalloca(<{ %struct.A }>) %[[ARG:.*]])
+// CHECK: %[[V:.*]] = getelementptr inbounds <{ %struct.A }>, ptr %[[ARG]], i32 0, i32 0
+// CHECK: %call = call x86_thiscallcc noundef i32 
+// CHECK-SAME: @"?__impl@@?0??test@@YAXXZ@QBE?A?@@UA@@@Z"
+// CHECK-SAME: (ptr noundef %this, ptr noundef %[[V]])
+
+// CHECK: define internal noundef i32
+// CHECK-SAME: @"?__invoke@@?0??test@@YAXXZ@CA?A?@@UA@@@Z"
+// CHECK-SAME: (ptr inalloca(<{ %struct.A }>) %[[ARG:.*]])
+// CHECK: %unused.capture = alloca %class.anon, align 1
+// CHECK: %[[VAR:.*]] = getelementptr inbounds <{ %struct.A }>, ptr %[[ARG]], i32 0, i32 0
+// CHECK: %call = call x86_thiscallcc noundef i32
+// CHECK-SAME: @"?__impl@@?0??test@@YAXXZ@QBE?A?@@UA@@@Z"
+// CHECK-SAME: (ptr noundef %unused.capture, ptr noundef %[[VAR]])
+// CHECK: ret i32 %call 
 
+// CHECK: define internal x86_thiscallcc noundef i32
+// CHECK-SAME: @"?__impl@@?0??test@@YAXXZ@QBE?A?@@UA@@@Z"
+// CHECK-SAME: (ptr noundef %this, ptr noundef %[[ARG:.*]])
+// CHECK: %this.addr = alloca ptr, align 4
+// CHECK: store ptr %this, ptr %this.addr, align 4
+// CHECK: %this1 = load ptr, ptr %this.addr, align 4
+// CHECK: %{{.*}} = load i32, ptr @"?calls@?1???R
+// CHECK: %inc = add nsw i32 %{{.*}}, 1
+// CHECK: store i32 %inc, ptr @"?calls@?1???R
+// CHECK: %{{.*}} = getelementptr inbounds %struct.A, ptr %{{.*}}, i32 0, i32 0
+// CHECK: %{{.*}} = load i32, ptr %{{.*}}, align 4
+// CHECK: %{{.*}} = load i32, ptr @"?calls@?1???R
+// CHECK: %add = add nsw i32 %{{.*}}, %{{.*}}
+// CHECK: ret i32 %add
Index: clang/lib/CodeGen/Targets/X86.cpp
===
--- clang/lib/CodeGen/Targets/X86.cpp
+++ clang/lib/CodeGen/Targets/X86.cpp
@@ -140,7 +140,8 @@
 
   Class classify(QualType Ty) const;
   ABIArgInfo classifyReturnType(QualType RetTy, CCState ) const;
-  ABIArgInfo classifyArgumentType(QualType RetTy, CCState ) const;
+  ABIArgInfo classifyArgumentType(QualType RetTy, CCState ,
+  bool isDelegateCall) const;
 
   /// Updates the number of available free registers, returns
   /// true if any registers were allocated.
@@ -738,8 +739,8 @@
   }
 }
 
-ABIArgInfo X86_32ABIInfo::classifyArgumentType(QualType Ty,
-   CCState ) const {
+ABIArgInfo X86_32ABIInfo::classifyArgumentType(QualType Ty, CCState ,
+   bool isDelegateCall) const {
   // FIXME: Set alignment on indirect arguments.
   bool IsFastCall = State.CC == llvm::CallingConv::X86_FastCall;
   bool IsRegCall = State.CC == llvm::CallingConv::X86_RegCall;
@@ -752,7 +753,7 @@
   const RecordType *RT = Ty->getAs();
   if (RT) {
 CGCXXABI::RecordArgABI RAA = getRecordArgABI(RT, getCXXABI());
-if (RAA == CGCXXABI::RAA_Indirect) {
+if (RAA == CGCXXABI::RAA_Indirect || isDelegateCall) {
   return getIndirectResult(Ty, false, State);
 } else if (RAA == CGCXXABI::RAA_DirectInMemory) {
   // The field index doesn't matter, we'll fix it up later.
@@ -941,7 +942,8 @@
 if (State.IsPreassigned.test(I))
   continue;
 
-Args[I].info = classifyArgumentType(Args[I].type, State);
+Args[I].info =
+classifyArgumentType(Args[I].type, State, FI.isDelegateCall());
 UsedInAlloca |= (Args[I].info.getKind() == ABIArgInfo::InAlloca);
   }
 
Index: 

[PATCH] D137872: Implement lambdas with inalloca parameters by forwarding to function without inalloca calling convention.

2023-06-20 Thread Amy Huang via Phabricator via cfe-commits
This revision was landed with ongoing or failed builds.
This revision was automatically updated to reflect the committed changes.
Closed by commit rG015049338d7e: Try to implement lambdas with inalloca 
parameters by forwarding without use of… (authored by akhuang).

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D137872/new/

https://reviews.llvm.org/D137872

Files:
  clang/include/clang/CodeGen/CGFunctionInfo.h
  clang/lib/CodeGen/CGCall.cpp
  clang/lib/CodeGen/CGCall.h
  clang/lib/CodeGen/CGClass.cpp
  clang/lib/CodeGen/CGDeclCXX.cpp
  clang/lib/CodeGen/CodeGenABITypes.cpp
  clang/lib/CodeGen/CodeGenFunction.cpp
  clang/lib/CodeGen/CodeGenFunction.h
  clang/lib/CodeGen/CodeGenTypes.h
  clang/lib/CodeGen/Targets/X86.cpp
  clang/test/CodeGenCXX/inalloca-lambda.cpp

Index: clang/test/CodeGenCXX/inalloca-lambda.cpp
===
--- clang/test/CodeGenCXX/inalloca-lambda.cpp
+++ clang/test/CodeGenCXX/inalloca-lambda.cpp
@@ -1,11 +1,50 @@
-// RUN: not %clang_cc1 -triple i686-windows-msvc -emit-llvm -o /dev/null %s  2>&1 | FileCheck %s
+// RUN: %clang_cc1 -triple i686-windows-msvc -emit-llvm -o - %s  2>&1 | FileCheck %s
 
-// PR28299
-// CHECK: error: cannot compile this forwarded non-trivially copyable parameter yet
-
-class A {
+struct A {
+  A();
   A(const A &);
+  int x;
 };
-typedef void (*fptr_t)(A);
-fptr_t fn1() { return [](A) {}; }
+void decayToFp(int (*f)(A));
+void test() {
+  auto ld = [](A a) {
+static int calls = 0;
+++calls;
+return a.x + calls;
+  };
+  decayToFp(ld);
+  ld(A{});
+}
+
+// CHECK: define internal x86_thiscallcc noundef i32 
+// CHECK-SAME: @"??R@?0??test@@YAXXZ@QBE?A?@@UA@@@Z"
+// CHECK-SAME: (ptr noundef %this, ptr inalloca(<{ %struct.A }>) %[[ARG:.*]])
+// CHECK: %[[V:.*]] = getelementptr inbounds <{ %struct.A }>, ptr %[[ARG]], i32 0, i32 0
+// CHECK: %call = call x86_thiscallcc noundef i32 
+// CHECK-SAME: @"?__impl@@?0??test@@YAXXZ@QBE?A?@@UA@@@Z"
+// CHECK-SAME: (ptr noundef %this, ptr noundef %[[V]])
+
+// CHECK: define internal noundef i32
+// CHECK-SAME: @"?__invoke@@?0??test@@YAXXZ@CA?A?@@UA@@@Z"
+// CHECK-SAME: (ptr inalloca(<{ %struct.A }>) %[[ARG:.*]])
+// CHECK: %unused.capture = alloca %class.anon, align 1
+// CHECK: %[[VAR:.*]] = getelementptr inbounds <{ %struct.A }>, ptr %[[ARG]], i32 0, i32 0
+// CHECK: %call = call x86_thiscallcc noundef i32
+// CHECK-SAME: @"?__impl@@?0??test@@YAXXZ@QBE?A?@@UA@@@Z"
+// CHECK-SAME: (ptr noundef %unused.capture, ptr noundef %[[VAR]])
+// CHECK: ret i32 %call 
 
+// CHECK: define internal x86_thiscallcc noundef i32
+// CHECK-SAME: @"?__impl@@?0??test@@YAXXZ@QBE?A?@@UA@@@Z"
+// CHECK-SAME: (ptr noundef %this, ptr noundef %[[ARG:.*]])
+// CHECK: %this.addr = alloca ptr, align 4
+// CHECK: store ptr %this, ptr %this.addr, align 4
+// CHECK: %this1 = load ptr, ptr %this.addr, align 4
+// CHECK: %{{.*}} = load i32, ptr @"?calls@?1???R
+// CHECK: %inc = add nsw i32 %{{.*}}, 1
+// CHECK: store i32 %inc, ptr @"?calls@?1???R
+// CHECK: %{{.*}} = getelementptr inbounds %struct.A, ptr %{{.*}}, i32 0, i32 0
+// CHECK: %{{.*}} = load i32, ptr %{{.*}}, align 4
+// CHECK: %{{.*}} = load i32, ptr @"?calls@?1???R
+// CHECK: %add = add nsw i32 %{{.*}}, %{{.*}}
+// CHECK: ret i32 %add
Index: clang/lib/CodeGen/Targets/X86.cpp
===
--- clang/lib/CodeGen/Targets/X86.cpp
+++ clang/lib/CodeGen/Targets/X86.cpp
@@ -140,7 +140,8 @@
 
   Class classify(QualType Ty) const;
   ABIArgInfo classifyReturnType(QualType RetTy, CCState ) const;
-  ABIArgInfo classifyArgumentType(QualType RetTy, CCState ) const;
+  ABIArgInfo classifyArgumentType(QualType RetTy, CCState ,
+  bool isDelegateCall) const;
 
   /// Updates the number of available free registers, returns
   /// true if any registers were allocated.
@@ -738,8 +739,8 @@
   }
 }
 
-ABIArgInfo X86_32ABIInfo::classifyArgumentType(QualType Ty,
-   CCState ) const {
+ABIArgInfo X86_32ABIInfo::classifyArgumentType(QualType Ty, CCState ,
+   bool isDelegateCall) const {
   // FIXME: Set alignment on indirect arguments.
   bool IsFastCall = State.CC == llvm::CallingConv::X86_FastCall;
   bool IsRegCall = State.CC == llvm::CallingConv::X86_RegCall;
@@ -752,7 +753,7 @@
   const RecordType *RT = Ty->getAs();
   if (RT) {
 CGCXXABI::RecordArgABI RAA = getRecordArgABI(RT, getCXXABI());
-if (RAA == CGCXXABI::RAA_Indirect) {
+if (RAA == CGCXXABI::RAA_Indirect || isDelegateCall) {
   return getIndirectResult(Ty, false, State);
 } else if (RAA == CGCXXABI::RAA_DirectInMemory) {
   // The field index doesn't matter, we'll fix it up later.
@@ -941,7 +942,8 @@
 if (State.IsPreassigned.test(I))
   continue;
 
-Args[I].info = classifyArgumentType(Args[I].type, State);
+Args[I].info =
+

[PATCH] D154007: Reland "Try to implement lambdas with inalloca parameters by forwarding without use of inallocas."

2023-06-28 Thread Amy Huang via Phabricator via cfe-commits
akhuang added inline comments.



Comment at: clang/lib/CodeGen/CGCall.cpp:5103
 
-if (Addr.getAlignment() < Align &&
+if (CallInfo.isDelegateCall()) {
+  NeedCopy = false;

rnk wrote:
> akhuang wrote:
> > I think the problem is that it tries to do a copy here because the 
> > alignment of the forwarding function arg is larger than the alignment of 
> > the object that's being passed. I'm not sure how alignments are computed or 
> > if there are any other requirements for alignment. Is it ok to just ignore 
> > the new alignment? Do we need to change the code that computes the argument 
> > alignment?
> > 
> > (crbug.com/1457256#comment2 has an example repro)
> Yes, in general, structs with doubles and i64 members are passed misaligned 
> on i686. This is true for all functions, not just lambdas. We should power 
> down whatever alignment logic is causing the copy.
huh, ok, good to know. 


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D154007/new/

https://reviews.llvm.org/D154007

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D154007: Reland "Try to implement lambdas with inalloca parameters by forwarding without use of inallocas."

2023-06-28 Thread Amy Huang via Phabricator via cfe-commits
akhuang created this revision.
Herald added a project: All.
akhuang requested review of this revision.
Herald added a project: clang.
Herald added a subscriber: cfe-commits.

This reverts commit 8ed7aa59f489715d39d32e72a787b8e75cfda151 
.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D154007

Files:
  clang/include/clang/CodeGen/CGFunctionInfo.h
  clang/lib/CodeGen/CGCall.cpp
  clang/lib/CodeGen/CGCall.h
  clang/lib/CodeGen/CGClass.cpp
  clang/lib/CodeGen/CGDeclCXX.cpp
  clang/lib/CodeGen/CodeGenABITypes.cpp
  clang/lib/CodeGen/CodeGenFunction.cpp
  clang/lib/CodeGen/CodeGenFunction.h
  clang/lib/CodeGen/CodeGenTypes.h
  clang/lib/CodeGen/Targets/X86.cpp
  clang/test/CodeGenCXX/inalloca-lambda.cpp

Index: clang/test/CodeGenCXX/inalloca-lambda.cpp
===
--- clang/test/CodeGenCXX/inalloca-lambda.cpp
+++ clang/test/CodeGenCXX/inalloca-lambda.cpp
@@ -1,11 +1,50 @@
-// RUN: not %clang_cc1 -triple i686-windows-msvc -emit-llvm -o /dev/null %s  2>&1 | FileCheck %s
+// RUN: %clang_cc1 -triple i686-windows-msvc -emit-llvm -o - %s  2>&1 | FileCheck %s
 
-// PR28299
-// CHECK: error: cannot compile this forwarded non-trivially copyable parameter yet
-
-class A {
+struct A {
+  A();
   A(const A &);
+  int x;
 };
-typedef void (*fptr_t)(A);
-fptr_t fn1() { return [](A) {}; }
+void decayToFp(int (*f)(A));
+void test() {
+  auto ld = [](A a) {
+static int calls = 0;
+++calls;
+return a.x + calls;
+  };
+  decayToFp(ld);
+  ld(A{});
+}
+
+// CHECK: define internal x86_thiscallcc noundef i32 
+// CHECK-SAME: @"??R@?0??test@@YAXXZ@QBE?A?@@UA@@@Z"
+// CHECK-SAME: (ptr noundef %this, ptr inalloca(<{ %struct.A }>) %[[ARG:.*]])
+// CHECK: %[[V:.*]] = getelementptr inbounds <{ %struct.A }>, ptr %[[ARG]], i32 0, i32 0
+// CHECK: %call = call x86_thiscallcc noundef i32 
+// CHECK-SAME: @"?__impl@@?0??test@@YAXXZ@QBE?A?@@UA@@@Z"
+// CHECK-SAME: (ptr noundef %this, ptr noundef %[[V]])
+
+// CHECK: define internal noundef i32
+// CHECK-SAME: @"?__invoke@@?0??test@@YAXXZ@CA?A?@@UA@@@Z"
+// CHECK-SAME: (ptr inalloca(<{ %struct.A }>) %[[ARG:.*]])
+// CHECK: %unused.capture = alloca %class.anon, align 1
+// CHECK: %[[VAR:.*]] = getelementptr inbounds <{ %struct.A }>, ptr %[[ARG]], i32 0, i32 0
+// CHECK: %call = call x86_thiscallcc noundef i32
+// CHECK-SAME: @"?__impl@@?0??test@@YAXXZ@QBE?A?@@UA@@@Z"
+// CHECK-SAME: (ptr noundef %unused.capture, ptr noundef %[[VAR]])
+// CHECK: ret i32 %call 
 
+// CHECK: define internal x86_thiscallcc noundef i32
+// CHECK-SAME: @"?__impl@@?0??test@@YAXXZ@QBE?A?@@UA@@@Z"
+// CHECK-SAME: (ptr noundef %this, ptr noundef %[[ARG:.*]])
+// CHECK: %this.addr = alloca ptr, align 4
+// CHECK: store ptr %this, ptr %this.addr, align 4
+// CHECK: %this1 = load ptr, ptr %this.addr, align 4
+// CHECK: %{{.*}} = load i32, ptr @"?calls@?1???R
+// CHECK: %inc = add nsw i32 %{{.*}}, 1
+// CHECK: store i32 %inc, ptr @"?calls@?1???R
+// CHECK: %{{.*}} = getelementptr inbounds %struct.A, ptr %{{.*}}, i32 0, i32 0
+// CHECK: %{{.*}} = load i32, ptr %{{.*}}, align 4
+// CHECK: %{{.*}} = load i32, ptr @"?calls@?1???R
+// CHECK: %add = add nsw i32 %{{.*}}, %{{.*}}
+// CHECK: ret i32 %add
Index: clang/lib/CodeGen/Targets/X86.cpp
===
--- clang/lib/CodeGen/Targets/X86.cpp
+++ clang/lib/CodeGen/Targets/X86.cpp
@@ -140,7 +140,8 @@
 
   Class classify(QualType Ty) const;
   ABIArgInfo classifyReturnType(QualType RetTy, CCState ) const;
-  ABIArgInfo classifyArgumentType(QualType RetTy, CCState ) const;
+  ABIArgInfo classifyArgumentType(QualType RetTy, CCState ,
+  bool isDelegateCall) const;
 
   /// Updates the number of available free registers, returns
   /// true if any registers were allocated.
@@ -738,8 +739,8 @@
   }
 }
 
-ABIArgInfo X86_32ABIInfo::classifyArgumentType(QualType Ty,
-   CCState ) const {
+ABIArgInfo X86_32ABIInfo::classifyArgumentType(QualType Ty, CCState ,
+   bool isDelegateCall) const {
   // FIXME: Set alignment on indirect arguments.
   bool IsFastCall = State.CC == llvm::CallingConv::X86_FastCall;
   bool IsRegCall = State.CC == llvm::CallingConv::X86_RegCall;
@@ -752,7 +753,7 @@
   const RecordType *RT = Ty->getAs();
   if (RT) {
 CGCXXABI::RecordArgABI RAA = getRecordArgABI(RT, getCXXABI());
-if (RAA == CGCXXABI::RAA_Indirect) {
+if (RAA == CGCXXABI::RAA_Indirect || isDelegateCall) {
   return getIndirectResult(Ty, false, State);
 } else if (RAA == CGCXXABI::RAA_DirectInMemory) {
   // The field index doesn't matter, we'll fix it up later.
@@ -941,7 +942,8 @@
 if (State.IsPreassigned.test(I))
   continue;
 
-Args[I].info = classifyArgumentType(Args[I].type, State);
+Args[I].info =
+

[PATCH] D154007: Reland "Try to implement lambdas with inalloca parameters by forwarding without use of inallocas."

2023-06-28 Thread Amy Huang via Phabricator via cfe-commits
akhuang added a comment.

In D154007#4457561 , @efriedma wrote:

> I'm not confident that isUsed() works the way you want it to in this context. 
>  In particular, if the code in question runs before we've translated the 
> whole translation unit, the isUsed() bit could change.  If you want that's 
> more obviously safe, you could just check if there are any captures.  (I'm 
> assuming the point of this is just to reduce the number of lambdas that go 
> through this codepath?)

Oh, thanks. Yeah, the point is we don't have to go down this path if we never 
have to emit the invoker.

In D154007#4457592 , @rnk wrote:

> Can you please add a test case for the issue that caused the revert? I wanted 
> to dig into that to try to understand why the extra copy was being emitted. I 
> think you mentioned it has something to do with increasing the alignment.

oops, I added comments when I put up the patch but never submitted them.. 
there's a repro in crbug.com/1457256 but I'll also add a test case




Comment at: clang/lib/CodeGen/CGCall.cpp:5103
 
-if (Addr.getAlignment() < Align &&
+if (CallInfo.isDelegateCall()) {
+  NeedCopy = false;

I think the problem is that it tries to do a copy here because the alignment of 
the forwarding function arg is larger than the alignment of the object that's 
being passed. I'm not sure how alignments are computed or if there are any 
other requirements for alignment. Is it ok to just ignore the new alignment? Do 
we need to change the code that computes the argument alignment?

(crbug.com/1457256#comment2 has an example repro)


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D154007/new/

https://reviews.llvm.org/D154007

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D137872: Implement lambdas with inalloca parameters by forwarding to function without inalloca calling convention.

2023-06-16 Thread Amy Huang via Phabricator via cfe-commits
akhuang added inline comments.



Comment at: clang/lib/CodeGen/CodeGenFunction.cpp:1470
+->getLambdaStaticInvoker()) &&
+ !Fn->getName().contains("__impl")) {
+// If emitting a lambda with static invoker on X86 Windows, change

efriedma wrote:
> I'm not sure `__impl` is unique enough that user code will never use it.  (I 
> mean, it's reserved, but we don't actually forbid using it.)
> 
> Can we use the isDelegateCall() bit?
yep, that makes more sense. 


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D137872/new/

https://reviews.llvm.org/D137872

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D137872: Implement lambdas with inalloca parameters by forwarding to function without inalloca calling convention.

2023-06-16 Thread Amy Huang via Phabricator via cfe-commits
akhuang updated this revision to Diff 532291.
akhuang added a comment.

Use isDelegateCall to check whether function is actual call op fn or not


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D137872/new/

https://reviews.llvm.org/D137872

Files:
  clang/include/clang/CodeGen/CGFunctionInfo.h
  clang/lib/CodeGen/CGCall.cpp
  clang/lib/CodeGen/CGCall.h
  clang/lib/CodeGen/CGClass.cpp
  clang/lib/CodeGen/CGDeclCXX.cpp
  clang/lib/CodeGen/CodeGenABITypes.cpp
  clang/lib/CodeGen/CodeGenFunction.cpp
  clang/lib/CodeGen/CodeGenFunction.h
  clang/lib/CodeGen/CodeGenTypes.h
  clang/lib/CodeGen/TargetInfo.cpp
  clang/test/CodeGenCXX/inalloca-lambda.cpp

Index: clang/test/CodeGenCXX/inalloca-lambda.cpp
===
--- clang/test/CodeGenCXX/inalloca-lambda.cpp
+++ clang/test/CodeGenCXX/inalloca-lambda.cpp
@@ -1,11 +1,50 @@
-// RUN: not %clang_cc1 -triple i686-windows-msvc -emit-llvm -o /dev/null %s  2>&1 | FileCheck %s
+// RUN: %clang_cc1 -triple i686-windows-msvc -emit-llvm -o - %s  2>&1 | FileCheck %s
 
-// PR28299
-// CHECK: error: cannot compile this forwarded non-trivially copyable parameter yet
-
-class A {
+struct A {
+  A();
   A(const A &);
+  int x;
 };
-typedef void (*fptr_t)(A);
-fptr_t fn1() { return [](A) {}; }
+void decayToFp(int (*f)(A));
+void test() {
+  auto ld = [](A a) {
+static int calls = 0;
+++calls;
+return a.x + calls;
+  };
+  decayToFp(ld);
+  ld(A{});
+}
+
+// CHECK: define internal x86_thiscallcc noundef i32 
+// CHECK-SAME: @"??R@?0??test@@YAXXZ@QBE?A?@@UA@@@Z"
+// CHECK-SAME: (ptr noundef %this, ptr inalloca(<{ %struct.A }>) %[[ARG:.*]])
+// CHECK: %[[V:.*]] = getelementptr inbounds <{ %struct.A }>, ptr %[[ARG]], i32 0, i32 0
+// CHECK: %call = call x86_thiscallcc noundef i32 
+// CHECK-SAME: @"?__impl@@?0??test@@YAXXZ@QBE?A?@@UA@@@Z"
+// CHECK-SAME: (ptr noundef %this, ptr noundef %[[V]])
+
+// CHECK: define internal noundef i32
+// CHECK-SAME: @"?__invoke@@?0??test@@YAXXZ@CA?A?@@UA@@@Z"
+// CHECK-SAME: (ptr inalloca(<{ %struct.A }>) %[[ARG:.*]])
+// CHECK: %unused.capture = alloca %class.anon, align 1
+// CHECK: %[[VAR:.*]] = getelementptr inbounds <{ %struct.A }>, ptr %[[ARG]], i32 0, i32 0
+// CHECK: %call = call x86_thiscallcc noundef i32
+// CHECK-SAME: @"?__impl@@?0??test@@YAXXZ@QBE?A?@@UA@@@Z"
+// CHECK-SAME: (ptr noundef %unused.capture, ptr noundef %[[VAR]])
+// CHECK: ret i32 %call 
 
+// CHECK: define internal x86_thiscallcc noundef i32
+// CHECK-SAME: @"?__impl@@?0??test@@YAXXZ@QBE?A?@@UA@@@Z"
+// CHECK-SAME: (ptr noundef %this, ptr noundef %[[ARG:.*]])
+// CHECK: %this.addr = alloca ptr, align 4
+// CHECK: store ptr %this, ptr %this.addr, align 4
+// CHECK: %this1 = load ptr, ptr %this.addr, align 4
+// CHECK: %{{.*}} = load i32, ptr @"?calls@?1???R
+// CHECK: %inc = add nsw i32 %{{.*}}, 1
+// CHECK: store i32 %inc, ptr @"?calls@?1???R
+// CHECK: %{{.*}} = getelementptr inbounds %struct.A, ptr %{{.*}}, i32 0, i32 0
+// CHECK: %{{.*}} = load i32, ptr %{{.*}}, align 4
+// CHECK: %{{.*}} = load i32, ptr @"?calls@?1???R
+// CHECK: %add = add nsw i32 %{{.*}}, %{{.*}}
+// CHECK: ret i32 %add
Index: clang/lib/CodeGen/TargetInfo.cpp
===
--- clang/lib/CodeGen/TargetInfo.cpp
+++ clang/lib/CodeGen/TargetInfo.cpp
@@ -1197,7 +1197,8 @@
 
   Class classify(QualType Ty) const;
   ABIArgInfo classifyReturnType(QualType RetTy, CCState ) const;
-  ABIArgInfo classifyArgumentType(QualType RetTy, CCState ) const;
+  ABIArgInfo classifyArgumentType(QualType RetTy, CCState ,
+  bool isDelegateCall) const;
 
   /// Updates the number of available free registers, returns
   /// true if any registers were allocated.
@@ -1824,8 +1825,8 @@
   }
 }
 
-ABIArgInfo X86_32ABIInfo::classifyArgumentType(QualType Ty,
-   CCState ) const {
+ABIArgInfo X86_32ABIInfo::classifyArgumentType(QualType Ty, CCState ,
+   bool isDelegateCall) const {
   // FIXME: Set alignment on indirect arguments.
   bool IsFastCall = State.CC == llvm::CallingConv::X86_FastCall;
   bool IsRegCall = State.CC == llvm::CallingConv::X86_RegCall;
@@ -1838,7 +1839,7 @@
   const RecordType *RT = Ty->getAs();
   if (RT) {
 CGCXXABI::RecordArgABI RAA = getRecordArgABI(RT, getCXXABI());
-if (RAA == CGCXXABI::RAA_Indirect) {
+if (RAA == CGCXXABI::RAA_Indirect || isDelegateCall) {
   return getIndirectResult(Ty, false, State);
 } else if (RAA == CGCXXABI::RAA_DirectInMemory) {
   // The field index doesn't matter, we'll fix it up later.
@@ -2027,7 +2028,8 @@
 if (State.IsPreassigned.test(I))
   continue;
 
-Args[I].info = classifyArgumentType(Args[I].type, State);
+Args[I].info =
+classifyArgumentType(Args[I].type, State, FI.isDelegateCall());
 UsedInAlloca |= (Args[I].info.getKind() == 

[PATCH] D137872: Implement lambdas with inalloca parameters by forwarding to function without inalloca calling convention.

2023-05-16 Thread Amy Huang via Phabricator via cfe-commits
akhuang added a comment.

In D137872#4327615 , @efriedma wrote:

> I'm having a bit of trouble following how exactly the thunk creation is 
> working here... do we generate different code depending on whether the call 
> operator and/or the static invoker are referenced?

So I think it used to be that the static invoker calls the call operator which 
contains the body of the lambda? And now both the static invoker and the call 
operator are delegating to this new __impl function. In 
EmitLambdaStaticInvokeBody it first calls EmitLambdaInAllocaCallOpFn to make 
the new call operator (which is populated using 
EmitLambdaDelegatingInvokeBody). And then inside EmitLambdaDelegatingInvokeBody 
it generates the new impl function and calls it in the body. Not sure if that 
answers the question, I agree the code is a bit roundabout.

> Why is the function in EmitLambdaInAllocaCallOpFn not getting defined using 
> the normal CodeGenModule machinery?

Do you mean why it's not using CodeGenModule machinery to generate the new call 
op body or why we're changing the original call op body?


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D137872/new/

https://reviews.llvm.org/D137872

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


<    1   2   3   4   5