[clang] [AArch64] Add soft-float ABI (PR #74460)

via cfe-commits Tue, 05 Dec 2023 04:30:21 -0800

https://github.com/ostannard created 
https://github.com/llvm/llvm-project/pull/74460


This adds support for the AArch64 soft-float ABI. The specification for this 
ABI is currently in review at https://github.com/ARM-software/abi-aa/pull/232, 
and I won't commit this until that PR is merged.

Because all existing AArch64 hardware has floating-point hardware, we expect 
this to be a niche option, only used for embedded systems on R-profile systems. 
We are going to document that SysV-like systems should only ever use the base 
(hard-float) PCS variant: https://github.com/ARM-software/abi-aa/pull/233. For 
that reason, I've not added an option to select the ABI independently of the 
FPU hardware, instead the new ABI is enabled iff the target architecture does 
not have an FPU.

For testing, I have run this through an ABI fuzzer, but since this is the first 
implementation it can only test for internal consistency (callers and callees 
agree on the PCS), not for conformance to the ABI spec.

>From 316854b6558811aaa03b9f96be1849e0426f8aac Mon Sep 17 00:00:00 2001
From: Oliver Stannard <[email protected]>
Date: Fri, 1 Dec 2023 10:06:57 +0000
Subject: [PATCH 1/4] [AArch64] Split feature tests for FP and SIMD

AArch64TargetInfo defaults to having the FP feature enabled, but this
function was ignoring that and checking for SIMD instructions instead.

This won't affect most users, because the driver explicitly enables or
disables fp-armv8, which gets handled by
AArch64TargetInfo::handleTargetFeatures to turn FP and SIMD on or off.
However, it will make testing future patches easier, and allow testing
for the presense of FP registers/instructions in CC1 tests.

Change-Id: I2d2b3569dca5fa1dc40c5c6d1dabf7741b8c480e
---
 clang/lib/Basic/Targets/AArch64.cpp           |   3 +-
 .../test/CodeGen/attr-target-clones-aarch64.c | 126 +++++++++++++++---
 2 files changed, 107 insertions(+), 22 deletions(-)

diff --git a/clang/lib/Basic/Targets/AArch64.cpp 
b/clang/lib/Basic/Targets/AArch64.cpp
index c31f2e0bee543..23090dad83ad7 100644
--- a/clang/lib/Basic/Targets/AArch64.cpp
+++ b/clang/lib/Basic/Targets/AArch64.cpp
@@ -649,7 +649,8 @@ bool AArch64TargetInfo::hasFeature(StringRef Feature) const 
{
   return llvm::StringSwitch<bool>(Feature)
       .Cases("aarch64", "arm64", "arm", true)
       .Case("fmv", HasFMV)
-      .Cases("neon", "fp", "simd", FPU & NeonMode)
+      .Case("fp", FPU & FPUMode)
+      .Cases("neon", "simd", FPU & NeonMode)
       .Case("jscvt", HasJSCVT)
       .Case("fcma", HasFCMA)
       .Case("rng", HasRandGen)
diff --git a/clang/test/CodeGen/attr-target-clones-aarch64.c 
b/clang/test/CodeGen/attr-target-clones-aarch64.c
index 3f2f2fdd24e8a..4404dd0da8e5e 100644
--- a/clang/test/CodeGen/attr-target-clones-aarch64.c
+++ b/clang/test/CodeGen/attr-target-clones-aarch64.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py 
UTC_ARGS: --check-attributes --check-globals --include-generated-funcs
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -S -emit-llvm -o - %s | 
FileCheck %s
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature -fmv -S 
-emit-llvm -o - %s | FileCheck %s -check-prefix=CHECK-NOFMV
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature -fp-armv8 -S 
-emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature -fp-armv8 
-target-feature -fmv -S -emit-llvm -o - %s | FileCheck %s 
-check-prefix=CHECK-NOFMV
 
 int __attribute__((target_clones("lse+aes", "sve2"))) ftc(void) { return 0; }
 int __attribute__((target_clones("sha2", "sha2+memtag2", " default "))) 
ftc_def(void) { return 1; }
@@ -22,6 +22,8 @@ int __attribute__((target_clones("default"))) main() {
 inline int __attribute__((target_clones("fp16", "sve2-bitperm+fcma", 
"default"))) ftc_inline2(void) { return 2; };
 
 
+
+//.
 // CHECK: @__aarch64_cpu_features = external dso_local global { i64 }
 // CHECK: @ftc.ifunc = weak_odr ifunc i32 (), ptr @ftc.resolver
 // CHECK: @ftc_def.ifunc = weak_odr ifunc i32 (), ptr @ftc_def.resolver
@@ -30,19 +32,25 @@ inline int __attribute__((target_clones("fp16", 
"sve2-bitperm+fcma", "default"))
 // CHECK: @ftc_inline1.ifunc = weak_odr ifunc i32 (), ptr @ftc_inline1.resolver
 // CHECK: @ftc_inline2.ifunc = weak_odr ifunc i32 (), ptr @ftc_inline2.resolver
 // CHECK: @ftc_inline3.ifunc = weak_odr ifunc i32 (), ptr @ftc_inline3.resolver
-
+//.
 // CHECK: Function Attrs: noinline nounwind optnone
 // CHECK-LABEL: @ftc._MlseMaes(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    ret i32 0
+//
+//
 // CHECK: Function Attrs: noinline nounwind optnone
 // CHECK-LABEL: @ftc._Msve2(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    ret i32 0
+//
+//
 // CHECK: Function Attrs: noinline nounwind optnone
 // CHECK-LABEL: @ftc(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    ret i32 0
+//
+//
 // CHECK-LABEL: @ftc.resolver(
 // CHECK-NEXT:  resolver_entry:
 // CHECK-NEXT:    call void @__init_cpu_features_resolver()
@@ -63,18 +71,26 @@ inline int __attribute__((target_clones("fp16", 
"sve2-bitperm+fcma", "default"))
 // CHECK-NEXT:    ret ptr @ftc._Msve2
 // CHECK:       resolver_else2:
 // CHECK-NEXT:    ret ptr @ftc
+//
+//
 // CHECK: Function Attrs: noinline nounwind optnone
 // CHECK-LABEL: @ftc_def._Msha2(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    ret i32 1
+//
+//
 // CHECK: Function Attrs: noinline nounwind optnone
 // CHECK-LABEL: @ftc_def._Msha2Mmemtag2(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    ret i32 1
+//
+//
 // CHECK: Function Attrs: noinline nounwind optnone
 // CHECK-LABEL: @ftc_def(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    ret i32 1
+//
+//
 // CHECK-LABEL: @ftc_def.resolver(
 // CHECK-NEXT:  resolver_entry:
 // CHECK-NEXT:    call void @__init_cpu_features_resolver()
@@ -95,14 +111,20 @@ inline int __attribute__((target_clones("fp16", 
"sve2-bitperm+fcma", "default"))
 // CHECK-NEXT:    ret ptr @ftc_def._Msha2
 // CHECK:       resolver_else2:
 // CHECK-NEXT:    ret ptr @ftc_def
+//
+//
 // CHECK: Function Attrs: noinline nounwind optnone
 // CHECK-LABEL: @ftc_dup1._Msha2(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    ret i32 2
+//
+//
 // CHECK: Function Attrs: noinline nounwind optnone
 // CHECK-LABEL: @ftc_dup1(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    ret i32 2
+//
+//
 // CHECK-LABEL: @ftc_dup1.resolver(
 // CHECK-NEXT:  resolver_entry:
 // CHECK-NEXT:    call void @__init_cpu_features_resolver()
@@ -115,18 +137,26 @@ inline int __attribute__((target_clones("fp16", 
"sve2-bitperm+fcma", "default"))
 // CHECK-NEXT:    ret ptr @ftc_dup1._Msha2
 // CHECK:       resolver_else:
 // CHECK-NEXT:    ret ptr @ftc_dup1
+//
+//
 // CHECK: Function Attrs: noinline nounwind optnone
 // CHECK-LABEL: @ftc_dup2._Mfp(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    ret i32 3
+//
+//
 // CHECK: Function Attrs: noinline nounwind optnone
 // CHECK-LABEL: @ftc_dup2._MdotprodMcrc(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    ret i32 3
+//
+//
 // CHECK: Function Attrs: noinline nounwind optnone
 // CHECK-LABEL: @ftc_dup2(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    ret i32 3
+//
+//
 // CHECK-LABEL: @ftc_dup2.resolver(
 // CHECK-NEXT:  resolver_entry:
 // CHECK-NEXT:    call void @__init_cpu_features_resolver()
@@ -147,6 +177,8 @@ inline int __attribute__((target_clones("fp16", 
"sve2-bitperm+fcma", "default"))
 // CHECK-NEXT:    ret ptr @ftc_dup2._Mfp
 // CHECK:       resolver_else2:
 // CHECK-NEXT:    ret ptr @ftc_dup2
+//
+//
 // CHECK: Function Attrs: noinline nounwind optnone
 // CHECK-LABEL: @foo(
 // CHECK-NEXT:  entry:
@@ -158,10 +190,14 @@ inline int __attribute__((target_clones("fp16", 
"sve2-bitperm+fcma", "default"))
 // CHECK-NEXT:    [[CALL4:%.*]] = call i32 @ftc_dup2.ifunc()
 // CHECK-NEXT:    [[ADD5:%.*]] = add nsw i32 [[ADD3]], [[CALL4]]
 // CHECK-NEXT:    ret i32 [[ADD5]]
+//
+//
 // CHECK: Function Attrs: noinline nounwind optnone
 // CHECK-LABEL: @ftc_direct(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    ret i32 4
+//
+//
 // CHECK: Function Attrs: noinline nounwind optnone
 // CHECK-LABEL: @main(
 // CHECK-NEXT:  entry:
@@ -175,6 +211,8 @@ inline int __attribute__((target_clones("fp16", 
"sve2-bitperm+fcma", "default"))
 // CHECK-NEXT:    [[CALL4:%.*]] = call i32 @ftc_direct()
 // CHECK-NEXT:    [[ADD5:%.*]] = add nsw i32 [[ADD3]], [[CALL4]]
 // CHECK-NEXT:    ret i32 [[ADD5]]
+//
+//
 // CHECK-LABEL: @ftc_inline1.resolver(
 // CHECK-NEXT:  resolver_entry:
 // CHECK-NEXT:    call void @__init_cpu_features_resolver()
@@ -203,6 +241,8 @@ inline int __attribute__((target_clones("fp16", 
"sve2-bitperm+fcma", "default"))
 // CHECK-NEXT:    ret ptr @ftc_inline1._MrngMsimd
 // CHECK:       resolver_else4:
 // CHECK-NEXT:    ret ptr @ftc_inline1
+//
+//
 // CHECK-LABEL: @ftc_inline2.resolver(
 // CHECK-NEXT:  resolver_entry:
 // CHECK-NEXT:    call void @__init_cpu_features_resolver()
@@ -223,6 +263,8 @@ inline int __attribute__((target_clones("fp16", 
"sve2-bitperm+fcma", "default"))
 // CHECK-NEXT:    ret ptr @ftc_inline2._Mfp16
 // CHECK:       resolver_else2:
 // CHECK-NEXT:    ret ptr @ftc_inline2
+//
+//
 // CHECK-LABEL: @ftc_inline3.resolver(
 // CHECK-NEXT:  resolver_entry:
 // CHECK-NEXT:    call void @__init_cpu_features_resolver()
@@ -243,62 +285,92 @@ inline int __attribute__((target_clones("fp16", 
"sve2-bitperm+fcma", "default"))
 // CHECK-NEXT:    ret ptr @ftc_inline3._Mbti
 // CHECK:       resolver_else2:
 // CHECK-NEXT:    ret ptr @ftc_inline3
+//
+//
 // CHECK: Function Attrs: noinline nounwind optnone
 // CHECK-LABEL: @ftc_inline1._MrngMsimd(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    ret i32 1
+//
+//
 // CHECK: Function Attrs: noinline nounwind optnone
 // CHECK-LABEL: @ftc_inline1._MrcpcMpredres(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    ret i32 1
+//
+//
 // CHECK: Function Attrs: noinline nounwind optnone
 // CHECK-LABEL: @ftc_inline1._Msve2-aesMwfxt(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    ret i32 1
+//
+//
 // CHECK: Function Attrs: noinline nounwind optnone
 // CHECK-LABEL: @ftc_inline1(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    ret i32 1
+//
+//
 // CHECK: Function Attrs: noinline nounwind optnone
 // CHECK-LABEL: @ftc_inline2._Mfp16(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    ret i32 2
+//
+//
 // CHECK: Function Attrs: noinline nounwind optnone
 // CHECK-LABEL: @ftc_inline2._MfcmaMsve2-bitperm(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    ret i32 2
+//
+//
 // CHECK: Function Attrs: noinline nounwind optnone
 // CHECK-LABEL: @ftc_inline2(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    ret i32 2
+//
+//
 // CHECK: Function Attrs: noinline nounwind optnone
 // CHECK-LABEL: @ftc_inline3._Mbti(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    ret i32 3
+//
+//
 // CHECK: Function Attrs: noinline nounwind optnone
 // CHECK-LABEL: @ftc_inline3._MsveMsb(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    ret i32 3
+//
+//
 // CHECK: Function Attrs: noinline nounwind optnone
 // CHECK-LABEL: @ftc_inline3(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    ret i32 3
+//
+//
 // CHECK-NOFMV: Function Attrs: noinline nounwind optnone
 // CHECK-NOFMV-LABEL: @ftc(
 // CHECK-NOFMV-NEXT:  entry:
 // CHECK-NOFMV-NEXT:    ret i32 0
+//
+//
 // CHECK-NOFMV: Function Attrs: noinline nounwind optnone
 // CHECK-NOFMV-LABEL: @ftc_def(
 // CHECK-NOFMV-NEXT:  entry:
 // CHECK-NOFMV-NEXT:    ret i32 1
+//
+//
 // CHECK-NOFMV: Function Attrs: noinline nounwind optnone
 // CHECK-NOFMV-LABEL: @ftc_dup1(
 // CHECK-NOFMV-NEXT:  entry:
 // CHECK-NOFMV-NEXT:    ret i32 2
+//
+//
 // CHECK-NOFMV: Function Attrs: noinline nounwind optnone
 // CHECK-NOFMV-LABEL: @ftc_dup2(
 // CHECK-NOFMV-NEXT:  entry:
 // CHECK-NOFMV-NEXT:    ret i32 3
+//
+//
 // CHECK-NOFMV: Function Attrs: noinline nounwind optnone
 // CHECK-NOFMV-LABEL: @foo(
 // CHECK-NOFMV-NEXT:  entry:
@@ -310,10 +382,14 @@ inline int __attribute__((target_clones("fp16", 
"sve2-bitperm+fcma", "default"))
 // CHECK-NOFMV-NEXT:    [[CALL4:%.*]] = call i32 @ftc_dup2()
 // CHECK-NOFMV-NEXT:    [[ADD5:%.*]] = add nsw i32 [[ADD3]], [[CALL4]]
 // CHECK-NOFMV-NEXT:    ret i32 [[ADD5]]
+//
+//
 // CHECK-NOFMV: Function Attrs: noinline nounwind optnone
 // CHECK-NOFMV-LABEL: @ftc_direct(
 // CHECK-NOFMV-NEXT:  entry:
 // CHECK-NOFMV-NEXT:    ret i32 4
+//
+//
 // CHECK-NOFMV: Function Attrs: noinline nounwind optnone
 // CHECK-NOFMV-LABEL: @main(
 // CHECK-NOFMV-NEXT:  entry:
@@ -327,21 +403,29 @@ inline int __attribute__((target_clones("fp16", 
"sve2-bitperm+fcma", "default"))
 // CHECK-NOFMV-NEXT:    [[CALL4:%.*]] = call i32 @ftc_direct()
 // CHECK-NOFMV-NEXT:    [[ADD5:%.*]] = add nsw i32 [[ADD3]], [[CALL4]]
 // CHECK-NOFMV-NEXT:    ret i32 [[ADD5]]
-
-// CHECK: attributes #0 = { noinline nounwind optnone 
"no-trapping-math"="true" "stack-protector-buffer-size"="8" 
"target-features"="+fp-armv8,+lse,+neon" }
-// CHECK: attributes #1 = { noinline nounwind optnone 
"no-trapping-math"="true" "stack-protector-buffer-size"="8" 
"target-features"="+fp-armv8,+fullfp16,+neon,+sve,+sve2" }
-// CHECK: attributes #2 = { noinline nounwind optnone 
"no-trapping-math"="true" "stack-protector-buffer-size"="8" }
-// CHECK: attributes #3 = { noinline nounwind optnone 
"no-trapping-math"="true" "stack-protector-buffer-size"="8" 
"target-features"="+fp-armv8,+neon,+sha2" }
-// CHECK: attributes #4 = { noinline nounwind optnone 
"no-trapping-math"="true" "stack-protector-buffer-size"="8" 
"target-features"="+fp-armv8,+mte,+neon,+sha2" }
-// CHECK: attributes #5 = { noinline nounwind optnone 
"no-trapping-math"="true" "stack-protector-buffer-size"="8" 
"target-features"="+fp-armv8,+neon" }
-// CHECK: attributes #6 = { noinline nounwind optnone 
"no-trapping-math"="true" "stack-protector-buffer-size"="8" 
"target-features"="+crc,+dotprod,+fp-armv8,+neon" }
-// CHECK: attributes #7 = { noinline nounwind optnone 
"no-trapping-math"="true" "stack-protector-buffer-size"="8" 
"target-features"="+fp-armv8,+neon,+rand" }
-// CHECK: attributes #8 = { noinline nounwind optnone 
"no-trapping-math"="true" "stack-protector-buffer-size"="8" 
"target-features"="+predres,+rcpc" }
-// CHECK: attributes #9 = { noinline nounwind optnone 
"no-trapping-math"="true" "stack-protector-buffer-size"="8" 
"target-features"="+fp-armv8,+fullfp16,+neon,+sve,+sve2,+sve2-aes,+wfxt" }
-// CHECK: attributes #10 = { noinline nounwind optnone 
"no-trapping-math"="true" "stack-protector-buffer-size"="8" 
"target-features"="+fp-armv8,+fullfp16,+neon" }
-// CHECK: attributes #11 = { noinline nounwind optnone 
"no-trapping-math"="true" "stack-protector-buffer-size"="8" 
"target-features"="+complxnum,+fp-armv8,+fullfp16,+neon,+sve,+sve2,+sve2-bitperm"
 }
-// CHECK: attributes #12 = { noinline nounwind optnone 
"no-trapping-math"="true" "stack-protector-buffer-size"="8" 
"target-features"="+bti" }
-// CHECK: attributes #13 = { noinline nounwind optnone 
"no-trapping-math"="true" "stack-protector-buffer-size"="8" 
"target-features"="+fp-armv8,+fullfp16,+neon,+sb,+sve" }
-
-// CHECK-NOFMV: attributes #0 = { noinline nounwind optnone 
"no-trapping-math"="true" "stack-protector-buffer-size"="8" 
"target-features"="-fmv" }
-// CHECK-NOFMV: attributes #1 = { "no-trapping-math"="true" 
"stack-protector-buffer-size"="8" "target-features"="-fmv" }
+//
+//.
+// CHECK: attributes #[[ATTR0:[0-9]+]] = { noinline nounwind optnone 
"no-trapping-math"="true" "stack-protector-buffer-size"="8" 
"target-features"="+lse,+neon,-fp-armv8" }
+// CHECK: attributes #[[ATTR1:[0-9]+]] = { noinline nounwind optnone 
"no-trapping-math"="true" "stack-protector-buffer-size"="8" 
"target-features"="+fullfp16,+neon,+sve,+sve2,-fp-armv8" }
+// CHECK: attributes #[[ATTR2:[0-9]+]] = { noinline nounwind optnone 
"no-trapping-math"="true" "stack-protector-buffer-size"="8" 
"target-features"="-fp-armv8" }
+// CHECK: attributes #[[ATTR3:[0-9]+]] = { noinline nounwind optnone 
"no-trapping-math"="true" "stack-protector-buffer-size"="8" 
"target-features"="+neon,+sha2,-fp-armv8" }
+// CHECK: attributes #[[ATTR4:[0-9]+]] = { noinline nounwind optnone 
"no-trapping-math"="true" "stack-protector-buffer-size"="8" 
"target-features"="+mte,+neon,+sha2,-fp-armv8" }
+// CHECK: attributes #[[ATTR5:[0-9]+]] = { noinline nounwind optnone 
"no-trapping-math"="true" "stack-protector-buffer-size"="8" 
"target-features"="+neon,-fp-armv8" }
+// CHECK: attributes #[[ATTR6:[0-9]+]] = { noinline nounwind optnone 
"no-trapping-math"="true" "stack-protector-buffer-size"="8" 
"target-features"="+crc,+dotprod,+neon,-fp-armv8" }
+// CHECK: attributes #[[ATTR7:[0-9]+]] = { noinline nounwind optnone 
"no-trapping-math"="true" "stack-protector-buffer-size"="8" 
"target-features"="+neon,+rand,-fp-armv8" }
+// CHECK: attributes #[[ATTR8:[0-9]+]] = { noinline nounwind optnone 
"no-trapping-math"="true" "stack-protector-buffer-size"="8" 
"target-features"="+predres,+rcpc,-fp-armv8" }
+// CHECK: attributes #[[ATTR9:[0-9]+]] = { noinline nounwind optnone 
"no-trapping-math"="true" "stack-protector-buffer-size"="8" 
"target-features"="+fullfp16,+neon,+sve,+sve2,+sve2-aes,+wfxt,-fp-armv8" }
+// CHECK: attributes #[[ATTR10:[0-9]+]] = { noinline nounwind optnone 
"no-trapping-math"="true" "stack-protector-buffer-size"="8" 
"target-features"="+fullfp16,+neon,-fp-armv8" }
+// CHECK: attributes #[[ATTR11:[0-9]+]] = { noinline nounwind optnone 
"no-trapping-math"="true" "stack-protector-buffer-size"="8" 
"target-features"="+complxnum,+fullfp16,+neon,+sve,+sve2,+sve2-bitperm,-fp-armv8"
 }
+// CHECK: attributes #[[ATTR12:[0-9]+]] = { noinline nounwind optnone 
"no-trapping-math"="true" "stack-protector-buffer-size"="8" 
"target-features"="+bti,-fp-armv8" }
+// CHECK: attributes #[[ATTR13:[0-9]+]] = { noinline nounwind optnone 
"no-trapping-math"="true" "stack-protector-buffer-size"="8" 
"target-features"="+fullfp16,+neon,+sb,+sve,-fp-armv8" }
+//.
+// CHECK-NOFMV: attributes #[[ATTR0:[0-9]+]] = { noinline nounwind optnone 
"no-trapping-math"="true" "stack-protector-buffer-size"="8" 
"target-features"="-fmv,-fp-armv8" }
+// CHECK-NOFMV: attributes #[[ATTR1:[0-9]+]] = { "no-trapping-math"="true" 
"stack-protector-buffer-size"="8" "target-features"="-fmv,-fp-armv8" }
+//.
+// CHECK: [[META0:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
+// CHECK: [[META1:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"}
+//.
+// CHECK-NOFMV: [[META0:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
+// CHECK-NOFMV: [[META1:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"}
+//.

>From e541ff2c5fc1aaf9c924a2d39054c4eed46ca98d Mon Sep 17 00:00:00 2001
From: Oliver Stannard <[email protected]>
Date: Thu, 23 Nov 2023 11:29:28 +0000
Subject: [PATCH 2/4] [AArch64] Add a soft-float ABI

This patch just adds the ABI enum and tests existing behaviour, it
doesn't make any functional changes yet.

I've not added an option to turn this on, instead it is automatically
selected if the target does not have FP registers. This is because the
vast majority of existing AArch64 hardware does have an FPU, so we
expect this to only be used when abbsoultely necessary, unlike AArch32
where both ABIs were commonly used.

Change-Id: I6f3a4562ca60c31b49a926cdce1881418677c604
---
 clang/lib/CodeGen/CodeGenModule.cpp         |  2 ++
 clang/lib/CodeGen/TargetInfo.h              |  1 +
 clang/test/CodeGen/aarch64-soft-float-abi.c | 15 +++++++++++++++
 3 files changed, 18 insertions(+)
 create mode 100644 clang/test/CodeGen/aarch64-soft-float-abi.c

diff --git a/clang/lib/CodeGen/CodeGenModule.cpp 
b/clang/lib/CodeGen/CodeGenModule.cpp
index dea58a7ff4146..2e730fdb0b83f 100644
--- a/clang/lib/CodeGen/CodeGenModule.cpp
+++ b/clang/lib/CodeGen/CodeGenModule.cpp
@@ -143,6 +143,8 @@ createTargetCodeGenInfo(CodeGenModule &CGM) {
       Kind = AArch64ABIKind::DarwinPCS;
     else if (Triple.isOSWindows())
       return createWindowsAArch64TargetCodeGenInfo(CGM, AArch64ABIKind::Win64);
+    else if (!Target.hasFeature("fp"))
+      Kind = AArch64ABIKind::AAPCSSoft;
 
     return createAArch64TargetCodeGenInfo(CGM, Kind);
   }
diff --git a/clang/lib/CodeGen/TargetInfo.h b/clang/lib/CodeGen/TargetInfo.h
index 0c0781a2d5ab9..0b69d92b70cee 100644
--- a/clang/lib/CodeGen/TargetInfo.h
+++ b/clang/lib/CodeGen/TargetInfo.h
@@ -416,6 +416,7 @@ enum class AArch64ABIKind {
   AAPCS = 0,
   DarwinPCS,
   Win64,
+  AAPCSSoft,
 };
 
 std::unique_ptr<TargetCodeGenInfo>
diff --git a/clang/test/CodeGen/aarch64-soft-float-abi.c 
b/clang/test/CodeGen/aarch64-soft-float-abi.c
new file mode 100644
index 0000000000000..2ff9de8d34bd1
--- /dev/null
+++ b/clang/test/CodeGen/aarch64-soft-float-abi.c
@@ -0,0 +1,15 @@
+// RUN: %clang_cc1 -triple aarch64 -target-feature +fp-armv8 -emit-llvm -o - 
%s | FileCheck %s --check-prefixes=CHECK
+// RUN: %clang_cc1 -triple aarch64 -target-feature -fp-armv8 -emit-llvm -o - 
%s | FileCheck %s --check-prefixes=CHECK
+
+// Floats are passed in integer registers, this will be handled by the backend.
+// CHECK: define dso_local half @test0(half noundef %a)
+// CHECK: define dso_local bfloat @test1(bfloat noundef %a)
+// CHECK: define dso_local float @test2(float noundef %a)
+// CHECK: define dso_local double @test3(double noundef %a)
+// CHECK: define dso_local fp128 @test4(fp128 noundef %a)
+__fp16 test0(__fp16 a) { return a; }
+__bf16 test1(__bf16 a) { return a; }
+float test2(float a) { return a; }
+double test3(double a) { return a; }
+long double test4(long double a) { return a; }
+

>From 8891380d65f168eb66e92bd453448b83a7865aef Mon Sep 17 00:00:00 2001
From: Oliver Stannard <[email protected]>
Date: Thu, 23 Nov 2023 11:31:52 +0000
Subject: [PATCH 3/4] [AArch64] Disable HFAs/HVAs for soft-float

If we can't pass them in FP registers, then homogeneous floating-point
and vector aggregates should be treated like any other composite type,
and passed either in registers or on the stack.

Change-Id: Icd56e122ad586462d6059069f923ffca4b32a8d2
---
 clang/lib/CodeGen/Targets/AArch64.cpp       |  5 +++++
 clang/test/CodeGen/aarch64-soft-float-abi.c | 24 +++++++++++++++++++--
 2 files changed, 27 insertions(+), 2 deletions(-)

diff --git a/clang/lib/CodeGen/Targets/AArch64.cpp 
b/clang/lib/CodeGen/Targets/AArch64.cpp
index be5145daa00b7..e290e8761a5f4 100644
--- a/clang/lib/CodeGen/Targets/AArch64.cpp
+++ b/clang/lib/CodeGen/Targets/AArch64.cpp
@@ -478,6 +478,11 @@ bool AArch64SwiftABIInfo::isLegalVectorType(CharUnits 
VectorSize,
 }
 
 bool AArch64ABIInfo::isHomogeneousAggregateBaseType(QualType Ty) const {
+  // For the soft-float ABI variant, no types are considered to be homogeneous
+  // aggregates.
+  if (Kind == AArch64ABIKind::AAPCSSoft)
+    return false;
+
   // Homogeneous aggregates for AAPCS64 must have base types of a floating
   // point type or a short-vector type. This is the same as the 32-bit ABI,
   // but with the difference that any floating-point type is allowed,
diff --git a/clang/test/CodeGen/aarch64-soft-float-abi.c 
b/clang/test/CodeGen/aarch64-soft-float-abi.c
index 2ff9de8d34bd1..0d80e82e6aef3 100644
--- a/clang/test/CodeGen/aarch64-soft-float-abi.c
+++ b/clang/test/CodeGen/aarch64-soft-float-abi.c
@@ -1,5 +1,5 @@
-// RUN: %clang_cc1 -triple aarch64 -target-feature +fp-armv8 -emit-llvm -o - 
%s | FileCheck %s --check-prefixes=CHECK
-// RUN: %clang_cc1 -triple aarch64 -target-feature -fp-armv8 -emit-llvm -o - 
%s | FileCheck %s --check-prefixes=CHECK
+// RUN: %clang_cc1 -triple aarch64 -target-feature +fp-armv8 -emit-llvm -o - 
%s | FileCheck %s --check-prefixes=CHECK,HARD
+// RUN: %clang_cc1 -triple aarch64 -target-feature -fp-armv8 -emit-llvm -o - 
%s | FileCheck %s --check-prefixes=CHECK,SOFT
 
 // Floats are passed in integer registers, this will be handled by the backend.
 // CHECK: define dso_local half @test0(half noundef %a)
@@ -13,3 +13,23 @@ float test2(float a) { return a; }
 double test3(double a) { return a; }
 long double test4(long double a) { return a; }
 
+// No types are considered to be HFAs or HVAs by the soft-float PCS, so these
+// are converted to integer types.
+struct A {
+  float x;
+};
+// SOFT: define dso_local i32 @test10(i64 %a.coerce)
+// HARD: define dso_local %struct.A @test10([1 x float] alignstack(8) 
%a.coerce)
+struct A test10(struct A a) { return a; }
+
+struct B {
+  double x;
+  double y;
+};
+// SOFT: define dso_local [2 x i64] @test11([2 x i64] %a.coerce)
+// HARD: define dso_local %struct.B @test11([2 x double] alignstack(8) 
%a.coerce)
+struct B test11(struct B a) { return a; }
+
+// Vector types are only available for targets with the correct hardware, and
+// their calling-convention is left undefined by the soft-float ABI, so they
+// aren't tested here.

>From b68ed60a3fe78bc0e9f57b45bed9dd78ed851904 Mon Sep 17 00:00:00 2001
From: Oliver Stannard <[email protected]>
Date: Thu, 23 Nov 2023 11:31:28 +0000
Subject: [PATCH 4/4] [AArch64] Fix va_arg for soft-float

The AArch64 back-end already avoids saving the FP registers to the
va_list when the FP registers aren't present, but clang also needs to
know not to load them from the FP register save area when generating
code for va_arg.

The layout of va_list remains the same, but the vr_top and vr_offs
fields are unused.

Change-Id: I5d3dee1ac4a29f189432957910662939b79d9329
---
 clang/lib/CodeGen/Targets/AArch64.cpp       | 12 +++++++-----
 clang/test/CodeGen/aarch64-soft-float-abi.c | 18 ++++++++++++++++++
 2 files changed, 25 insertions(+), 5 deletions(-)

diff --git a/clang/lib/CodeGen/Targets/AArch64.cpp 
b/clang/lib/CodeGen/Targets/AArch64.cpp
index e290e8761a5f4..a4089bb6c70f1 100644
--- a/clang/lib/CodeGen/Targets/AArch64.cpp
+++ b/clang/lib/CodeGen/Targets/AArch64.cpp
@@ -53,8 +53,8 @@ class AArch64ABIInfo : public ABIInfo {
   Address EmitDarwinVAArg(Address VAListAddr, QualType Ty,
                           CodeGenFunction &CGF) const;
 
-  Address EmitAAPCSVAArg(Address VAListAddr, QualType Ty,
-                         CodeGenFunction &CGF) const;
+  Address EmitAAPCSVAArg(Address VAListAddr, QualType Ty, CodeGenFunction &CGF,
+                         AArch64ABIKind Kind) const;
 
   Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
                     QualType Ty) const override {
@@ -65,7 +65,7 @@ class AArch64ABIInfo : public ABIInfo {
 
     return Kind == AArch64ABIKind::Win64 ? EmitMSVAArg(CGF, VAListAddr, Ty)
            : isDarwinPCS()               ? EmitDarwinVAArg(VAListAddr, Ty, CGF)
-                                         : EmitAAPCSVAArg(VAListAddr, Ty, CGF);
+                                         : EmitAAPCSVAArg(VAListAddr, Ty, CGF, 
Kind);
   }
 
   Address EmitMSVAArg(CodeGenFunction &CGF, Address VAListAddr,
@@ -514,7 +514,8 @@ bool 
AArch64ABIInfo::isZeroLengthBitfieldPermittedInHomogeneousAggregate()
 }
 
 Address AArch64ABIInfo::EmitAAPCSVAArg(Address VAListAddr, QualType Ty,
-                                       CodeGenFunction &CGF) const {
+                                       CodeGenFunction &CGF,
+                                       AArch64ABIKind Kind) const {
   ABIArgInfo AI = classifyArgumentType(Ty, /*IsVariadic=*/true,
                                        CGF.CurFnInfo->getCallingConvention());
   // Empty records are ignored for parameter passing purposes.
@@ -539,7 +540,8 @@ Address AArch64ABIInfo::EmitAAPCSVAArg(Address VAListAddr, 
QualType Ty,
     BaseTy = ArrTy->getElementType();
     NumRegs = ArrTy->getNumElements();
   }
-  bool IsFPR = BaseTy->isFloatingPointTy() || BaseTy->isVectorTy();
+  bool IsFPR = Kind == AArch64ABIKind::AAPCS &&
+               (BaseTy->isFloatingPointTy() || BaseTy->isVectorTy());
 
   // The AArch64 va_list type and handling is specified in the Procedure Call
   // Standard, section B.4:
diff --git a/clang/test/CodeGen/aarch64-soft-float-abi.c 
b/clang/test/CodeGen/aarch64-soft-float-abi.c
index 0d80e82e6aef3..687647d650e1d 100644
--- a/clang/test/CodeGen/aarch64-soft-float-abi.c
+++ b/clang/test/CodeGen/aarch64-soft-float-abi.c
@@ -1,6 +1,9 @@
 // RUN: %clang_cc1 -triple aarch64 -target-feature +fp-armv8 -emit-llvm -o - 
%s | FileCheck %s --check-prefixes=CHECK,HARD
 // RUN: %clang_cc1 -triple aarch64 -target-feature -fp-armv8 -emit-llvm -o - 
%s | FileCheck %s --check-prefixes=CHECK,SOFT
 
+// The va_list type does not change between the ABIs
+// CHECK: %struct.__va_list = type { ptr, ptr, ptr, i32, i32 }
+
 // Floats are passed in integer registers, this will be handled by the backend.
 // CHECK: define dso_local half @test0(half noundef %a)
 // CHECK: define dso_local bfloat @test1(bfloat noundef %a)
@@ -30,6 +33,21 @@ struct B {
 // HARD: define dso_local %struct.B @test11([2 x double] alignstack(8) 
%a.coerce)
 struct B test11(struct B a) { return a; }
 
+#include <stdarg.h>
+
+// For variadic arguments, va_arg will always retreive 
+// CHECK-LABEL: define dso_local double @test20(i32 noundef %a, ...)
+// CHECK: %vl = alloca %struct.__va_list, align 8
+// SOFT: %gr_offs_p = getelementptr inbounds %struct.__va_list, ptr %vl, i32 
0, i32 3
+// SOFT: %reg_top_p = getelementptr inbounds %struct.__va_list, ptr %vl, i32 
0, i32 1
+// HARD: %vr_offs_p = getelementptr inbounds %struct.__va_list, ptr %vl, i32 
0, i32 4
+// HARD: %reg_top_p = getelementptr inbounds %struct.__va_list, ptr %vl, i32 
0, i32 2
+double test20(int a, ...) {
+  va_list vl;
+  va_start(vl, a);
+  return va_arg(vl, double);
+}
+
 // Vector types are only available for targets with the correct hardware, and
 // their calling-convention is left undefined by the soft-float ABI, so they
 // aren't tested here.

_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [AArch64] Add soft-float ABI (PR #74460)

Reply via email to