[PATCH] D46013: [ARM] Conform to AAPCS when passing overaligned composites as arguments

Momchil Velikov via Phabricator via cfe-commits Thu, 31 May 2018 02:29:28 -0700

chill updated this revision to Diff 149252.
chill added a comment.

Update:


- similar changes needed for AArch64
- added/updated tests


https://reviews.llvm.org/D46013

Files:
  include/clang/AST/ASTContext.h
  include/clang/AST/RecordLayout.h
  lib/AST/ASTContext.cpp
  lib/AST/RecordLayout.cpp
  lib/AST/RecordLayoutBuilder.cpp
  lib/CodeGen/TargetInfo.cpp
  test/CodeGen/aapcs-align.cc
  test/CodeGen/aapcs64-align.cc
  test/CodeGen/arm-arguments.c
  test/CodeGen/arm64-arguments.c

Index: test/CodeGen/arm64-arguments.c
===================================================================
--- test/CodeGen/arm64-arguments.c
+++ test/CodeGen/arm64-arguments.c
@@ -216,11 +216,11 @@
 
 typedef __attribute__((neon_vector_type(4))) int int32x4_t;
 int32x4_t f36(int i, s36_with_align s1, s36_with_align s2) {
-// CHECK: define <4 x i32> @f36(i32 %i, i128 %s1.coerce, i128 %s2.coerce)
+// CHECK: define <4 x i32> @f36(i32 %i, [2 x i64] %s1.coerce, [2 x i64] %s2.coerce)
 // CHECK: %s1 = alloca %struct.s36, align 16
 // CHECK: %s2 = alloca %struct.s36, align 16
-// CHECK: store i128 %s1.coerce, i128* %{{.*}}, align 16
-// CHECK: store i128 %s2.coerce, i128* %{{.*}}, align 16
+// CHECK: store [2 x i64] %s1.coerce, [2 x i64]* %{{.*}}, align 16
+// CHECK: store [2 x i64] %s2.coerce, [2 x i64]* %{{.*}}, align 16
 // CHECK: %[[a:.*]] = bitcast %struct.s36* %s1 to <4 x i32>*
 // CHECK: load <4 x i32>, <4 x i32>* %[[a]], align 16
 // CHECK: %[[b:.*]] = bitcast %struct.s36* %s2 to <4 x i32>*
@@ -325,11 +325,11 @@
 // passing aligned structs in registers
 __attribute__ ((noinline))
 int f39(int i, s39_with_align s1, s39_with_align s2) {
-// CHECK: define i32 @f39(i32 %i, i128 %s1.coerce, i128 %s2.coerce)
+// CHECK: define i32 @f39(i32 %i, [2 x i64] %s1.coerce, [2 x i64] %s2.coerce)
 // CHECK: %s1 = alloca %struct.s39, align 16
 // CHECK: %s2 = alloca %struct.s39, align 16
-// CHECK: store i128 %s1.coerce, i128* %{{.*}}, align 16
-// CHECK: store i128 %s2.coerce, i128* %{{.*}}, align 16
+// CHECK: store [2 x i64] %s1.coerce, [2 x i64]* %{{.*}}, align 16
+// CHECK: store [2 x i64] %s2.coerce, [2 x i64]* %{{.*}}, align 16
 // CHECK: getelementptr inbounds %struct.s39, %struct.s39* %s1, i32 0, i32 0
 // CHECK: getelementptr inbounds %struct.s39, %struct.s39* %s2, i32 0, i32 0
 // CHECK: getelementptr inbounds %struct.s39, %struct.s39* %s1, i32 0, i32 1
@@ -340,31 +340,31 @@
 s39_with_align g39_2;
 int caller39() {
 // CHECK: define i32 @caller39()
-// CHECK: %[[a:.*]] = load i128, i128* bitcast (%struct.s39* @g39 to i128*), align 16
-// CHECK: %[[b:.*]] = load i128, i128* bitcast (%struct.s39* @g39_2 to i128*), align 16
-// CHECK: call i32 @f39(i32 3, i128 %[[a]], i128 %[[b]])
+// CHECK: %[[a:.*]] = load [2 x i64], [2 x i64]* bitcast (%struct.s39* @g39 to [2 x i64]*), align 16
+// CHECK: %[[b:.*]] = load [2 x i64], [2 x i64]* bitcast (%struct.s39* @g39_2 to [2 x i64]*), align 16
+// CHECK: call i32 @f39(i32 3, [2 x i64] %[[a]], [2 x i64] %[[b]])
   return f39(3, g39, g39_2);
 }
 // passing aligned structs on stack
 __attribute__ ((noinline))
 int f39_stack(int i, int i2, int i3, int i4, int i5, int i6, int i7, int i8,
               int i9, s39_with_align s1, s39_with_align s2) {
-// CHECK: define i32 @f39_stack(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, i32 %i8, i32 %i9, i128 %s1.coerce, i128 %s2.coerce)
+// CHECK  define i32 @f39_stack(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, i32 %i8, i32 %i9, [2 x i64] %s1.coerce, [2 x i64] %s2.coerce)
 // CHECK: %s1 = alloca %struct.s39, align 16
 // CHECK: %s2 = alloca %struct.s39, align 16
-// CHECK: store i128 %s1.coerce, i128* %{{.*}}, align 16
-// CHECK: store i128 %s2.coerce, i128* %{{.*}}, align 16
+// CHECK: store [2 x i64] %s1.coerce, [2 x i64]* %{{.*}}, align 16
+// CHECK: store [2 x i64] %s2.coerce, [2 x i64]* %{{.*}}, align 16
 // CHECK: getelementptr inbounds %struct.s39, %struct.s39* %s1, i32 0, i32 0
 // CHECK: getelementptr inbounds %struct.s39, %struct.s39* %s2, i32 0, i32 0
 // CHECK: getelementptr inbounds %struct.s39, %struct.s39* %s1, i32 0, i32 1
 // CHECK: getelementptr inbounds %struct.s39, %struct.s39* %s2, i32 0, i32 1
   return s1.i + s2.i + i + i2 + i3 + i4 + i5 + i6 + i7 + i8 + i9 + s1.s + s2.s;
 }
 int caller39_stack() {
 // CHECK: define i32 @caller39_stack()
-// CHECK: %[[a:.*]] = load i128, i128* bitcast (%struct.s39* @g39 to i128*), align 16
-// CHECK: %[[b:.*]] = load i128, i128* bitcast (%struct.s39* @g39_2 to i128*), align 16
-// CHECK: call i32 @f39_stack(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i128 %[[a]], i128 %[[b]])
+// CHECK: %[[a:.*]] = load [2 x i64], [2 x i64]* bitcast (%struct.s39* @g39 to [2 x i64]*), align 16
+// CHECK: %[[b:.*]] = load [2 x i64], [2 x i64]* bitcast (%struct.s39* @g39_2 to [2 x i64]*), align 16
+// CHECK: call i32 @f39_stack(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, [2 x i64] %[[a]], [2 x i64] %[[b]])
   return f39_stack(1, 2, 3, 4, 5, 6, 7, 8, 9, g39, g39_2);
 }
 
@@ -435,11 +435,11 @@
 // passing aligned structs in registers
 __attribute__ ((noinline))
 int f41(int i, s41_with_align s1, s41_with_align s2) {
-// CHECK: define i32 @f41(i32 %i, i128 %s1.coerce, i128 %s2.coerce)
+// CHECK: define i32 @f41(i32 %i, [2 x i64] %s1.coerce, [2 x i64] %s2.coerce)
 // CHECK: %s1 = alloca %struct.s41, align 16
 // CHECK: %s2 = alloca %struct.s41, align 16
-// CHECK: store i128 %s1.coerce, i128* %{{.*}}, align 16
-// CHECK: store i128 %s2.coerce, i128* %{{.*}}, align 16
+// CHECK: store [2 x i64] %s1.coerce, [2 x i64]* %{{.*}}, align 16
+// CHECK: store [2 x i64] %s2.coerce, [2 x i64]* %{{.*}}, align 16
 // CHECK: getelementptr inbounds %struct.s41, %struct.s41* %s1, i32 0, i32 0
 // CHECK: getelementptr inbounds %struct.s41, %struct.s41* %s2, i32 0, i32 0
 // CHECK: getelementptr inbounds %struct.s41, %struct.s41* %s1, i32 0, i32 1
@@ -450,31 +450,32 @@
 s41_with_align g41_2;
 int caller41() {
 // CHECK: define i32 @caller41()
-// CHECK: %[[a:.*]] = load i128, i128* bitcast (%struct.s41* @g41 to i128*), align 16
-// CHECK: %[[b:.*]] = load i128, i128* bitcast (%struct.s41* @g41_2 to i128*), align 16
-// CHECK: call i32 @f41(i32 3, i128 %[[a]], i128 %[[b]])
+// CHECK: %[[a:.*]] = load [2 x i64], [2 x i64]* bitcast (%struct.s41* @g41 to [2 x i64]*), align 16
+// CHECK: %[[b:.*]] = load [2 x i64], [2 x i64]* bitcast (%struct.s41* @g41_2 to [2 x i64]*), align 16
+// CHECK: call i32 @f41(i32 3, [2 x i64] %[[a]], [2 x i64] %[[b]])
   return f41(3, g41, g41_2);
 }
 // passing aligned structs on stack
 __attribute__ ((noinline))
 int f41_stack(int i, int i2, int i3, int i4, int i5, int i6, int i7, int i8,
               int i9, s41_with_align s1, s41_with_align s2) {
-// CHECK: define i32 @f41_stack(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, i32 %i8, i32 %i9, i128 %s1.coerce, i128 %s2.coerce)
+// CHECK: define i32 @f41_stack(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, i32 %i8, i32 %i9, [2 x i64] %s1.coerce, [2 x i64] %s2.coerce)
 // CHECK: %s1 = alloca %struct.s41, align 16
 // CHECK: %s2 = alloca %struct.s41, align 16
-// CHECK: store i128 %s1.coerce, i128* %{{.*}}, align 16
-// CHECK: store i128 %s2.coerce, i128* %{{.*}}, align 16
+// CHECK: store [2 x i64] %s1.coerce, [2 x i64]* %{{.*}}, align 16
+// CHECK: store [2 x i64] %s2.coerce, [2 x i64]* %{{.*}}, align 16
+
 // CHECK: getelementptr inbounds %struct.s41, %struct.s41* %s1, i32 0, i32 0
 // CHECK: getelementptr inbounds %struct.s41, %struct.s41* %s2, i32 0, i32 0
 // CHECK: getelementptr inbounds %struct.s41, %struct.s41* %s1, i32 0, i32 1
 // CHECK: getelementptr inbounds %struct.s41, %struct.s41* %s2, i32 0, i32 1
   return s1.i + s2.i + i + i2 + i3 + i4 + i5 + i6 + i7 + i8 + i9 + s1.s + s2.s;
 }
 int caller41_stack() {
 // CHECK: define i32 @caller41_stack()
-// CHECK: %[[a:.*]] = load i128, i128* bitcast (%struct.s41* @g41 to i128*), align 16
-// CHECK: %[[b:.*]] = load i128, i128* bitcast (%struct.s41* @g41_2 to i128*), align 16
-// CHECK: call i32 @f41_stack(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i128 %[[a]], i128 %[[b]])
+// CHECK: %[[a:.*]] = load [2 x i64], [2 x i64]* bitcast (%struct.s41* @g41 to [2 x i64]*), align 16
+// CHECK: %[[b:.*]] = load [2 x i64], [2 x i64]* bitcast (%struct.s41* @g41_2 to [2 x i64]*), align 16
+// CHECK: call i32 @f41_stack(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, [2 x i64] %[[a]], [2 x i64] %[[b]])
   return f41_stack(1, 2, 3, 4, 5, 6, 7, 8, 9, g41, g41_2);
 }
 
@@ -609,12 +610,12 @@
 __attribute__ ((noinline))
 int f41_split(int i, int i2, int i3, int i4, int i5, int i6, int i7,
               s41_with_align s1, s41_with_align s2) {
-// CHECK: define i32 @f41_split(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, i128 %s1.coerce, i128 %s2.coerce)
+// CHECK: define i32 @f41_split(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, [2 x i64] %s1.coerce, [2 x i64] %s2.coerce)
   return s1.i + s2.i + i + i2 + i3 + i4 + i5 + i6 + i7 + s1.s + s2.s;
 }
 int caller41_split() {
 // CHECK: define i32 @caller41_split()
-// CHECK: call i32 @f41_split(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i128 %{{.*}}, i128 %{{.*}})
+// CHECK: call i32 @f41_split(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, [2 x i64] %{{.*}}, [2 x i64] %{{.*}})
   return f41_split(1, 2, 3, 4, 5, 6, 7, g41, g41_2);
 }
 
Index: test/CodeGen/arm-arguments.c
===================================================================
--- test/CodeGen/arm-arguments.c
+++ test/CodeGen/arm-arguments.c
@@ -211,10 +211,13 @@
 // APCS-GNU: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align {{[0-9]+}} %[[b]], i8* align {{[0-9]+}} %[[c]]
 // APCS-GNU: %[[d:.*]] = bitcast %struct.s35* %[[a]] to <4 x float>*
 // APCS-GNU: load <4 x float>, <4 x float>* %[[d]], align 16
-// AAPCS-LABEL: define arm_aapcscc <4 x float> @f35(i32 %i, %struct.s35* byval align 8, %struct.s35* byval align 8)
-// AAPCS: %[[a:.*]] = alloca %struct.s35, align 16
-// AAPCS: %[[b:.*]] = bitcast %struct.s35* %[[a]] to i8*
-// AAPCS: %[[c:.*]] = bitcast %struct.s35* %0 to i8*
-// AAPCS: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 %[[b]], i8* align 8 %[[c]]
-// AAPCS: %[[d:.*]] = bitcast %struct.s35* %[[a]] to <4 x float>*
-// AAPCS: load <4 x float>, <4 x float>* %[[d]], align 16
+
+// AAPCS-LABEL: define arm_aapcscc <4 x float> @f35(i32 %i, %struct.s35* byval align 4 %s1, %struct.s35* byval align 4 %s2)
+// AAPCS: %[[a_addr:.*]] = alloca <4 x float>, align 16
+// AAPCS: %[[b_addr:.*]] = alloca <4 x float>, align 16
+// AAPCS: %[[p1:.*]] = bitcast %struct.s35* %s1 to <4 x float>*
+// AAPCS: %[[a:.*]] = load <4 x float>, <4 x float>* %[[p1]], align 4
+// AAPCS: %[[p2:.*]] = bitcast %struct.s35* %s2 to <4 x float>*
+// AAPCS: %[[b:.*]] = load <4 x float>, <4 x float>* %[[p2]], align 4
+// AAPCS: store <4 x float> %[[a]], <4 x float>* %[[a_addr]], align 16
+// AAPCS: store <4 x float> %[[b]], <4 x float>* %[[b_addr]], align 16
Index: test/CodeGen/aapcs64-align.cc
===================================================================
--- /dev/null
+++ test/CodeGen/aapcs64-align.cc
@@ -0,0 +1,103 @@
+// REQUIRES: arm-registered-target
+// RUN: %clang_cc1 -triple aarch64-none-none-eabi \
+// RUN:   -O2 \
+// RUN:   -emit-llvm -o - %s | FileCheck %s
+
+extern "C" {
+
+// Base case, nothing interesting.
+struct S {
+  long x, y;
+};
+
+void f0(long, S);
+void f0m(long, long, long, long, long, S);
+void g0() {
+  S s = {6, 7};
+  f0(1, s);
+  f0m(1, 2, 3, 4, 5, s);
+}
+// CHECK: define void @g0
+// CHECK: call void @f0(i64 1, [2 x i64] [i64 6, i64 7]
+// CHECK: call void @f0m{{.*}}[2 x i64] [i64 6, i64 7]
+// CHECK: declare void @f0(i64, [2 x i64])
+// CHECK: declare void @f0m(i64, i64, i64, i64, i64, [2 x i64])
+
+// Aligned struct, passed according to its natural alignment.
+struct __attribute__((aligned(16))) S16 {
+  long x, y;
+} s16;
+
+void f1(long, S16);
+void f1m(long, long, long, long, long, S16);
+void g1() {
+  S16 s = {6, 7};
+  f1(1, s);
+  f1m(1, 2, 3, 4, 5, s);
+}
+// CHECK: define void @g1
+// CHECK: call void @f1{{.*}}[2 x i64] [i64 6, i64 7]
+// CHECK: call void @f1m{{.*}}[2 x i64] [i64 6, i64 7]
+// CHECK: declare void @f1(i64, [2 x i64])
+// CHECK: declare void @f1m(i64, i64, i64, i64, i64, [2 x i64])
+
+// Increased natural alignment.
+struct SF16 {
+  long x __attribute__((aligned(16)));
+  long y;
+};
+
+void f3(long, SF16);
+void f3m(long, long, long, long, long, SF16);
+void g3() {
+  SF16 s = {6, 7};
+  f3(1, s);
+  f3m(1, 2, 3, 4, 5, s);
+}
+// CHECK: define void @g3
+// CHECK: call void @f3(i64 1, i128 129127208515966861318)
+// CHECK: call void @f3m(i64 1, i64 2, i64 3, i64 4, i64 5, i128 129127208515966861318)
+// CHECK: declare void @f3(i64, i128)
+// CHECK: declare void @f3m(i64, i64, i64, i64, i64, i128)
+
+
+// Packed structure.
+struct  __attribute__((packed)) P {
+  int x;
+  long u;
+};
+
+void f4(int, P);
+void f4m(int, int, int, int, int, P);
+void g4() {
+  P s = {6, 7};
+  f4(1, s);
+  f4m(1, 2, 3, 4, 5, s);
+}
+// CHECK: define void @g4()
+// CHECK: call void @f4(i32 1, [2 x i64] [i64 30064771078, i64 0])
+// CHECK: void @f4m(i32 1, i32 2, i32 3, i32 4, i32 5, [2 x i64] [i64 30064771078, i64 0])
+// CHECK: declare void @f4(i32, [2 x i64])
+// CHECK: declare void @f4m(i32, i32, i32, i32, i32, [2 x i64])
+
+
+// Packed structure, overaligned, same as above.
+struct  __attribute__((packed, aligned(16))) P16 {
+  int x;
+  long y;
+};
+
+void f5(int, P16);
+void f5m(int, int, int, int, int, P16);
+  void g5() {
+    P16 s = {6, 7};
+    f5(1, s);
+    f5m(1, 2, 3, 4, 5, s);
+}
+// CHECK: define void @g5()
+// CHECK: call void @f5(i32 1, [2 x i64] [i64 30064771078, i64 0])
+// CHECK: void @f5m(i32 1, i32 2, i32 3, i32 4, i32 5, [2 x i64] [i64 30064771078, i64 0])
+// CHECK: declare void @f5(i32, [2 x i64])
+// CHECK: declare void @f5m(i32, i32, i32, i32, i32, [2 x i64])
+
+}
Index: test/CodeGen/aapcs-align.cc
===================================================================
--- /dev/null
+++ test/CodeGen/aapcs-align.cc
@@ -0,0 +1,141 @@
+// REQUIRES: arm-registered-target
+// RUN: %clang_cc1 -triple arm-none-none-eabi \
+// RUN:   -O2 \
+// RUN:   -target-cpu cortex-a8 \
+// RUN:   -emit-llvm -o - %s | FileCheck %s
+
+extern "C" {
+
+// Base case, nothing interesting.
+struct S {
+  int x, y;
+};
+
+void f0(int, S);
+void f0m(int, int, int, int, int, S);
+void g0() {
+  S s = {6, 7};
+  f0(1, s);
+  f0m(1, 2, 3, 4, 5, s);
+}
+// CHECK: define void @g0
+// CHECK: call void @f0(i32 1, [2 x i32] [i32 6, i32 7]
+// CHECK: call void @f0m(i32 1, i32 2, i32 3, i32 4, i32 5, [2 x i32] [i32 6, i32 7]
+// CHECK: declare void @f0(i32, [2 x i32])
+// CHECK: declare void @f0m(i32, i32, i32, i32, i32, [2 x i32])
+
+// Aligned struct, passed according to its natural alignment.
+struct __attribute__((aligned(8))) S8 {
+  int x, y;
+} s8;
+
+void f1(int, S8);
+void f1m(int, int, int, int, int, S8);
+void g1() {
+  S8 s = {6, 7};
+  f1(1, s);
+  f1m(1, 2, 3, 4, 5, s);
+}
+// CHECK: define void @g1
+// CHECK: call void @f1(i32 1, [2 x i32] [i32 6, i32 7]
+// CHECK: call void @f1m(i32 1, i32 2, i32 3, i32 4, i32 5, [2 x i32] [i32 6, i32 7]
+// CHECK: declare void @f1(i32, [2 x i32])
+// CHECK: declare void @f1m(i32, i32, i32, i32, i32, [2 x i32])
+
+// Aligned struct, passed according to its natural alignment.
+struct alignas(16) S16 {
+  int x, y;
+};
+
+extern "C" void f2(int, S16);
+extern "C" void f2m(int, int, int, int, int, S16);
+
+void g2() {
+  S16 s = {6, 7};
+  f2(1, s);
+  f2m(1, 2, 3, 4, 5, s);
+}
+// CHECK: define void @g2
+// CHECK: call void @f2(i32 1, [4 x i32] [i32 6, i32 7
+// CHECK: call void @f2m(i32 1, i32 2, i32 3, i32 4, i32 5, [4 x i32] [i32 6, i32 7
+// CHECK: declare void @f2(i32, [4 x i32])
+// CHECK: declare void @f2m(i32, i32, i32, i32, i32, [4 x i32])
+
+// Increased natural alignment.
+struct SF8 {
+  int x __attribute__((aligned(8)));
+  int y;
+};
+
+void f3(int, SF8);
+void f3m(int, int, int, int, int, SF8);
+void g3() {
+  SF8 s = {6, 7};
+  f3(1, s);
+  f3m(1, 2, 3, 4, 5, s);
+}
+// CHECK: define void @g3
+// CHECK: call void @f3(i32 1, [1 x i64] [i64 30064771078]
+// CHECK: call void @f3m(i32 1, i32 2, i32 3, i32 4, i32 5, [1 x i64] [i64 30064771078]
+// CHECK: declare void @f3(i32, [1 x i64])
+// CHECK: declare void @f3m(i32, i32, i32, i32, i32, [1 x i64])
+
+// Increased natural alignment, capped to 8 though.
+struct SF16 {
+  int x;
+  int y alignas(16);
+  int z, a, b, c, d, e, f, g, h, i, j, k;
+};
+
+void f4(int, SF16);
+void f4m(int, int, int, int, int, SF16);
+void g4() {
+  SF16 s = {6, 7};
+  f4(1, s);
+  f4m(1, 2, 3, 4, 5, s);
+}
+// CHECK: define void @g4
+// CHECK: call void @f4(i32 1, %struct.SF16* byval nonnull align 8
+// CHECK: call void @f4m(i32 1, i32 2, i32 3, i32 4, i32 5, %struct.SF16* byval nonnull align 8
+// CHECK: declare void @f4(i32, %struct.SF16* byval align 8)
+// CHECK: declare void @f4m(i32, i32, i32, i32, i32, %struct.SF16* byval align 8)
+
+// Packed structure.
+struct  __attribute__((packed)) P {
+  int x;
+  long long u;
+};
+
+void f5(int, P);
+void f5m(int, int, int, int, int, P);
+void g5() {
+  P s = {6, 7};
+  f5(1, s);
+  f5m(1, 2, 3, 4, 5, s);
+}
+// CHECK: define void @g5
+// CHECK: call void @f5(i32 1, [3 x i32] [i32 6, i32 7, i32 0])
+// CHECK: call void @f5m(i32 1, i32 2, i32 3, i32 4, i32 5, [3 x i32] [i32 6, i32 7, i32 0])
+// CHECK: declare void @f5(i32, [3 x i32])
+// CHECK: declare void @f5m(i32, i32, i32, i32, i32, [3 x i32])
+
+
+// Packed and aligned, alignement causes padding at the end.
+struct  __attribute__((packed, aligned(8))) P8 {
+  int x;
+  long long u;
+};
+
+void f6(int, P8);
+void f6m(int, int, int, int, int, P8);
+void g6() {
+  P8 s = {6, 7};
+  f6(1, s);
+  f6m(1, 2, 3, 4, 5, s);
+}
+// CHECK: define void @g6
+// CHECK: call void @f6(i32 1, [4 x i32] [i32 6, i32 7, i32 0, i32 0])
+// CHECK: call void @f6m(i32 1, i32 2, i32 3, i32 4, i32 5, [4 x i32] [i32 6, i32 7, i32 0, i32 0])
+// CHECK: declare void @f6(i32, [4 x i32])
+// CHECK: declare void @f6m(i32, i32, i32, i32, i32, [4 x i32])
+}
Index: lib/CodeGen/TargetInfo.cpp
===================================================================
--- lib/CodeGen/TargetInfo.cpp
+++ lib/CodeGen/TargetInfo.cpp
@@ -5049,7 +5049,8 @@
     if (getTarget().isRenderScriptTarget()) {
       return coerceToIntArray(Ty, getContext(), getVMContext());
     }
-    unsigned Alignment = getContext().getTypeAlign(Ty);
+    unsigned Alignment = getContext().getTypeNaturalAlign(Ty);
+    Alignment = std::min(std::max(Alignment, 64u), 128u);
     Size = llvm::alignTo(Size, 64); // round up to multiple of 8 bytes
 
     // We use a pair of i64 for 16-byte aggregate with 8-byte alignment.
@@ -5786,15 +5787,18 @@
   // The ABI alignment for APCS is 4-byte and for AAPCS at least 4-byte and at
   // most 8-byte. We realign the indirect argument if type alignment is bigger
   // than ABI alignment.
-  uint64_t ABIAlign = 4;
-  uint64_t TyAlign = getContext().getTypeAlign(Ty) / 8;
-  if (getABIKind() == ARMABIInfo::AAPCS_VFP ||
-       getABIKind() == ARMABIInfo::AAPCS)
-    ABIAlign = std::min(std::max(TyAlign, (uint64_t)4), (uint64_t)8);
-
+  uint64_t ABIAlign = 32;
+  uint64_t TyAlign;
+    if (getABIKind() == ARMABIInfo::AAPCS_VFP ||
+      getABIKind() == ARMABIInfo::AAPCS) {
+      TyAlign = getContext().getTypeNaturalAlign(Ty);
+      ABIAlign = std::min(std::max(TyAlign, (uint64_t)32), (uint64_t)64);
+  } else {
+      TyAlign = getContext().getTypeAlign(Ty) ;
+  }
   if (getContext().getTypeSizeInChars(Ty) > CharUnits::fromQuantity(64)) {
     assert(getABIKind() != ARMABIInfo::AAPCS16_VFP && "unexpected byval");
-    return ABIArgInfo::getIndirect(CharUnits::fromQuantity(ABIAlign),
+    return ABIArgInfo::getIndirect(CharUnits::fromQuantity(ABIAlign / 8),
                                    /*ByVal=*/true,
                                    /*Realign=*/TyAlign > ABIAlign);
   }
@@ -5810,7 +5814,7 @@
   unsigned SizeRegs;
   // FIXME: Try to match the types of the arguments more accurately where
   // we can.
-  if (getContext().getTypeAlign(Ty) <= 32) {
+  if (TyAlign <= 32) {
     ElemTy = llvm::Type::getInt32Ty(getVMContext());
     SizeRegs = (getContext().getTypeSize(Ty) + 31) / 32;
   } else {
Index: lib/AST/RecordLayoutBuilder.cpp
===================================================================
--- lib/AST/RecordLayoutBuilder.cpp
+++ lib/AST/RecordLayoutBuilder.cpp
@@ -582,6 +582,9 @@
   /// The alignment if attribute packed is not used.
   CharUnits UnpackedAlignment;
 
+  /// \brief The maximum of the alignments of top-level members.
+  CharUnits NaturalAlignment;
+
   SmallVector<uint64_t, 16> FieldOffsets;
 
   /// Whether the external AST source has provided a layout for this
@@ -662,6 +665,7 @@
                              EmptySubobjectMap *EmptySubobjects)
       : Context(Context), EmptySubobjects(EmptySubobjects), Size(0),
         Alignment(CharUnits::One()), UnpackedAlignment(CharUnits::One()),
+        NaturalAlignment(CharUnits::One()),
         UseExternalLayout(false), InferAlignment(false), Packed(false),
         IsUnion(false), IsMac68kAlign(false), IsMsStruct(false),
         UnfilledBitsInLastUnit(0), LastBitfieldTypeSize(0),
@@ -1707,7 +1711,9 @@
   setSize(std::max(getSizeInBits(), getDataSizeInBits()));
 
   // Remember max struct/class alignment.
-  UpdateAlignment(Context.toCharUnitsFromBits(FieldAlign), 
+  NaturalAlignment =
+      std::max(NaturalAlignment, Context.toCharUnitsFromBits(FieldAlign));
+  UpdateAlignment(Context.toCharUnitsFromBits(FieldAlign),
                   Context.toCharUnitsFromBits(UnpackedFieldAlign));
 }
 
@@ -1862,6 +1868,7 @@
   setSize(std::max(getSizeInBits(), getDataSizeInBits()));
 
   // Remember max struct/class alignment.
+  NaturalAlignment = std::max(NaturalAlignment, FieldAlign);
   UpdateAlignment(FieldAlign, UnpackedFieldAlign);
 }
 
@@ -2980,7 +2987,8 @@
     if (const auto *RD = dyn_cast<CXXRecordDecl>(D)) {
       Builder.cxxLayout(RD);
       NewEntry = new (*this) ASTRecordLayout(
-          *this, Builder.Size, Builder.Alignment, Builder.RequiredAlignment,
+          *this, Builder.Size, Builder.Alignment, Builder.Alignment,
+          Builder.RequiredAlignment,
           Builder.HasOwnVFPtr, Builder.HasOwnVFPtr || Builder.PrimaryBase,
           Builder.VBPtrOffset, Builder.DataSize, Builder.FieldOffsets,
           Builder.NonVirtualSize, Builder.Alignment, CharUnits::Zero(),
@@ -2990,7 +2998,8 @@
     } else {
       Builder.layout(D);
       NewEntry = new (*this) ASTRecordLayout(
-          *this, Builder.Size, Builder.Alignment, Builder.RequiredAlignment,
+          *this, Builder.Size, Builder.Alignment, Builder.Alignment,
+          Builder.RequiredAlignment,
           Builder.Size, Builder.FieldOffsets);
     }
   } else {
@@ -3011,7 +3020,7 @@
       CharUnits NonVirtualSize =
           skipTailPadding ? DataSize : Builder.NonVirtualSize;
       NewEntry = new (*this) ASTRecordLayout(
-          *this, Builder.getSize(), Builder.Alignment,
+          *this, Builder.getSize(), Builder.Alignment, Builder.NaturalAlignment,
           /*RequiredAlignment : used by MS-ABI)*/
           Builder.Alignment, Builder.HasOwnVFPtr, RD->isDynamicClass(),
           CharUnits::fromQuantity(-1), DataSize, Builder.FieldOffsets,
@@ -3024,7 +3033,7 @@
       Builder.Layout(D);
 
       NewEntry = new (*this) ASTRecordLayout(
-          *this, Builder.getSize(), Builder.Alignment,
+          *this, Builder.getSize(), Builder.Alignment, Builder.NaturalAlignment,
           /*RequiredAlignment : used by MS-ABI)*/
           Builder.Alignment, Builder.getSize(), Builder.FieldOffsets);
     }
@@ -3178,6 +3187,7 @@
   const ASTRecordLayout *NewEntry =
     new (*this) ASTRecordLayout(*this, Builder.getSize(),
                                 Builder.Alignment,
+                                Builder.NaturalAlignment,
                                 /*RequiredAlignment : used by MS-ABI)*/
                                 Builder.Alignment,
                                 Builder.getDataSize(),
Index: lib/AST/RecordLayout.cpp
===================================================================
--- lib/AST/RecordLayout.cpp
+++ lib/AST/RecordLayout.cpp
@@ -30,17 +30,20 @@
 
 ASTRecordLayout::ASTRecordLayout(const ASTContext &Ctx, CharUnits size,
                                  CharUnits alignment,
+                                 CharUnits naturalAlignment,
                                  CharUnits requiredAlignment,
                                  CharUnits datasize,
                                  ArrayRef<uint64_t> fieldoffsets)
     : Size(size), DataSize(datasize), Alignment(alignment),
+      NaturalAlignment(naturalAlignment),
       RequiredAlignment(requiredAlignment) {
   FieldOffsets.append(Ctx, fieldoffsets.begin(), fieldoffsets.end());
 }
 
 // Constructor for C++ records.
 ASTRecordLayout::ASTRecordLayout(const ASTContext &Ctx,
                                  CharUnits size, CharUnits alignment,
+                                 CharUnits naturalAlignment,
                                  CharUnits requiredAlignment,
                                  bool hasOwnVFPtr, bool hasExtendableVFPtr,
                                  CharUnits vbptroffset,
@@ -57,6 +60,7 @@
                                  const BaseOffsetsMapTy& BaseOffsets,
                                  const VBaseOffsetsMapTy& VBaseOffsets)
   : Size(size), DataSize(datasize), Alignment(alignment),
+    NaturalAlignment(naturalAlignment),
     RequiredAlignment(requiredAlignment), CXXInfo(new (Ctx) CXXRecordLayoutInfo)
 {
   FieldOffsets.append(Ctx, fieldoffsets.begin(), fieldoffsets.end());
Index: lib/AST/ASTContext.cpp
===================================================================
--- lib/AST/ASTContext.cpp
+++ lib/AST/ASTContext.cpp
@@ -1667,6 +1667,7 @@
 TypeInfo ASTContext::getTypeInfoImpl(const Type *T) const {
   uint64_t Width = 0;
   unsigned Align = 8;
+  unsigned NaturalAlign = 0;
   bool AlignIsRequired = false;
   unsigned AS = 0;
   switch (T->getTypeClass()) {
@@ -1878,6 +1879,7 @@
     const ASTRecordLayout &Layout = getASTObjCInterfaceLayout(ObjCI->getDecl());
     Width = toBits(Layout.getSize());
     Align = toBits(Layout.getAlignment());
+    NaturalAlign = toBits(Layout.getNaturalAlignment());
     break;
   }
   case Type::Record:
@@ -1896,6 +1898,7 @@
           getTypeInfo(ED->getIntegerType()->getUnqualifiedDesugaredType());
       if (unsigned AttrAlign = ED->getMaxAlignment()) {
         Info.Align = AttrAlign;
+        Info.NaturalAlign = Info.Align;
         Info.AlignIsRequired = true;
       }
       return Info;
@@ -1906,6 +1909,7 @@
     const ASTRecordLayout &Layout = getASTRecordLayout(RD);
     Width = toBits(Layout.getSize());
     Align = toBits(Layout.getAlignment());
+    NaturalAlign = toBits(Layout.getNaturalAlignment());
     AlignIsRequired = RD->hasAttr<AlignedAttr>();
     break;
   }
@@ -1942,6 +1946,7 @@
       AlignIsRequired = Info.AlignIsRequired;
     }
     Width = Info.Width;
+    NaturalAlign = Info.NaturalAlign;
     break;
   }
 
@@ -1985,7 +1990,9 @@
   }
 
   assert(llvm::isPowerOf2_32(Align) && "Alignment must be power of 2");
-  return TypeInfo(Width, Align, AlignIsRequired);
+  if (NaturalAlign == 0)
+    NaturalAlign = Align;
+  return TypeInfo(Width, Align, NaturalAlign, AlignIsRequired);
 }
 
 unsigned ASTContext::getOpenMPDefaultSimdAlign(QualType T) const {
Index: include/clang/AST/RecordLayout.h
===================================================================
--- include/clang/AST/RecordLayout.h
+++ include/clang/AST/RecordLayout.h
@@ -71,6 +71,10 @@
   // Alignment - Alignment of record in characters.
   CharUnits Alignment;
 
+  // NaturalAlignment - Maximum of the alignments of the record members in
+  // characters.
+  CharUnits NaturalAlignment;
+
   /// RequiredAlignment - The required alignment of the object.  In the MS-ABI
   /// the __declspec(align()) trumps #pramga pack and must always be obeyed.
   CharUnits RequiredAlignment;
@@ -136,14 +140,16 @@
   CXXRecordLayoutInfo *CXXInfo = nullptr;
 
   ASTRecordLayout(const ASTContext &Ctx, CharUnits size, CharUnits alignment,
+                  CharUnits naturalAlignment,
                   CharUnits requiredAlignment, CharUnits datasize,
                   ArrayRef<uint64_t> fieldoffsets);
 
   using BaseOffsetsMapTy = CXXRecordLayoutInfo::BaseOffsetsMapTy;
 
   // Constructor for C++ records.
   ASTRecordLayout(const ASTContext &Ctx,
                   CharUnits size, CharUnits alignment,
+                  CharUnits naturalAlignment,
                   CharUnits requiredAlignment,
                   bool hasOwnVFPtr, bool hasExtendableVFPtr,
                   CharUnits vbptroffset,
@@ -170,6 +176,9 @@
   /// getAlignment - Get the record alignment in characters.
   CharUnits getAlignment() const { return Alignment; }
 
+  /// getNaturalAlignment - Get the natural record alignment in characters.
+  CharUnits getNaturalAlignment() const { return NaturalAlignment; }
+
   /// getSize - Get the record size in characters.
   CharUnits getSize() const { return Size; }
 
Index: include/clang/AST/ASTContext.h
===================================================================
--- include/clang/AST/ASTContext.h
+++ include/clang/AST/ASTContext.h
@@ -138,11 +138,14 @@
 struct TypeInfo {
   uint64_t Width = 0;
   unsigned Align = 0;
+  unsigned NaturalAlign = 0;
   bool AlignIsRequired : 1;
 
   TypeInfo() : AlignIsRequired(false) {}
-  TypeInfo(uint64_t Width, unsigned Align, bool AlignIsRequired)
-      : Width(Width), Align(Align), AlignIsRequired(AlignIsRequired) {}
+  TypeInfo(uint64_t Width, unsigned Align, unsigned NaturalAlign,
+           bool AlignIsRequired)
+      : Width(Width), Align(Align), NaturalAlign(NaturalAlign),
+        AlignIsRequired(AlignIsRequired) {}
 };
 
 /// Holds long-lived AST nodes (such as types and decls) that can be
@@ -2047,6 +2050,15 @@
   unsigned getTypeAlign(QualType T) const { return getTypeInfo(T).Align; }
   unsigned getTypeAlign(const Type *T) const { return getTypeInfo(T).Align; }
 
+  /// Return the ABI-specified natural alignment of a (complete) type \p T,
+  /// before alignment adjustments, in bits.
+  unsigned getTypeNaturalAlign(QualType T) const {
+    return getTypeInfo(T).NaturalAlign;
+  }
+  unsigned getTypeNaturalAlign(const Type *T) const {
+    return getTypeInfo(T).NaturalAlign;
+  }
+
   /// Return the ABI-specified alignment of a type, in bits, or 0 if
   /// the type is incomplete and we cannot determine the alignment (for
   /// example, from alignment attributes).

_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D46013: [ARM] Conform to AAPCS when passing overaligned composites as arguments

Reply via email to