[clang] [llvm] [WIP] Correct lowering of `fp128` intrinsics (PR #76558)
https://github.com/tgross35 updated
https://github.com/llvm/llvm-project/pull/76558
>From dafb3a378876db6b3bf505b425b386fd8f79c918 Mon Sep 17 00:00:00 2001
From: Trevor Gross
Date: Fri, 11 Aug 2023 22:16:01 -0400
Subject: [PATCH 1/7] [IR] Add a test for `f128` libcall lowering (NFC)
`f128` intrinsic functions sometimes lower to `long double` library
calls when they instead need to be `f128` versions. Add a test
demonstrating current behavior.
---
.../CodeGen/Generic/f128-math-lowering.ll | 328 ++
1 file changed, 328 insertions(+)
create mode 100644 llvm/test/CodeGen/Generic/f128-math-lowering.ll
diff --git a/llvm/test/CodeGen/Generic/f128-math-lowering.ll
b/llvm/test/CodeGen/Generic/f128-math-lowering.ll
new file mode 100644
index 0..dfbd1eaeda109
--- /dev/null
+++ b/llvm/test/CodeGen/Generic/f128-math-lowering.ll
@@ -0,0 +1,328 @@
+; RUN: llc < %s -mtriple=aarch64-unknown-none -verify-machineinstrs |
FileCheck %s --check-prefix=CHECK-USELD
+; RUN: llc < %s -mtriple=i686-unknown -verify-machineinstrs | FileCheck %s
--check-prefix=CHECK-USELD
+; RUN: llc < %s -mtriple=riscv32 -verify-machineinstrs | FileCheck %s
--check-prefix=CHECK-USELD
+; RUN: llc < %s -mtriple=s390x-unknown -verify-machineinstrs | FileCheck %s
--check-prefix=CHECK-S390X
+; RUN: llc < %s -mtriple=x86_64-unknown -verify-machineinstrs | FileCheck %s
--check-prefix=CHECK-USELD
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -verify-machineinstrs |
FileCheck %s --check-prefix=CHECK-NOTLD
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-musl -verify-machineinstrs |
FileCheck %s --check-prefix=CHECK-USELD
+;
+; REQUIRES: aarch64-registered-target
+; REQUIRES: riscv-registered-target
+; REQUIRES: systemz-registered-target
+; REQUIRES: x86-registered-target
+;
+; Verify that fp128 intrinsics only lower to `long double` calls (e.g. `sinl`)
+; on platforms where `f128` and `long double` have the same layout, and
+; otherwise lower to `f128` versions (e.g. `sinf128`).
+;
+; Targets include:
+; * x86, x64 (80-bit long double)
+; * aarch64 (long double == f128)
+; * riscv32 (long double == f64)
+; * s390x (long double == f128, hardware support)
+; * A couple assorted environments for x86
+;
+; FIXME: only targets where long double is `f128` should be using `USELD`, all
+; others need to be NOTLD. PowerPC should be added but it currently emits an
+; interesting blend of both (e.g. `acosl` but `ceilf128`).
+
+define fp128 @test_acosf128(fp128 %a) {
+; CHECK-LABEL: test_acosf128:
+; CHECK-NOTLD: acosf128
+; CHECK-USELD: acosl
+; CHECK-S390X: acosl
+start:
+ %0 = tail call fp128 @llvm.acos.f128(fp128 %a)
+ ret fp128 %0
+}
+
+define fp128 @test_asinf128(fp128 %a) {
+; CHECK-LABEL: test_asinf128:
+; CHECK-NOTLD: asinf128
+; CHECK-USELD: asinl
+; CHECK-S390X: asinl
+start:
+ %0 = tail call fp128 @llvm.asin.f128(fp128 %a)
+ ret fp128 %0
+}
+
+define fp128 @test_atanf128(fp128 %a) {
+; CHECK-LABEL: test_atanf128:
+; CHECK-NOTLD: atanf128
+; CHECK-USELD: atanl
+; CHECK-S390X: atanl
+start:
+ %0 = tail call fp128 @llvm.atan.f128(fp128 %a)
+ ret fp128 %0
+}
+
+define fp128 @test_ceilf128(fp128 %a) {
+; CHECK-LABEL: test_ceilf128:
+; CHECK-NOTLD: ceilf128
+; CHECK-USELD: ceill
+; CHECK-S390X: ceill
+start:
+ %0 = tail call fp128 @llvm.ceil.f128(fp128 %a)
+ ret fp128 %0
+}
+
+define fp128 @test_copysignf128(fp128 %a, fp128 %b) {
+; copysign should always get lowered to assembly
+; CHECK-LABEL: test_copysignf128:
+; CHECK-NOT:copysignl
+; CHECK-NOT:copysignf128
+start:
+ %0 = tail call fp128 @llvm.copysign.f128(fp128 %a, fp128 %b)
+ ret fp128 %0
+}
+
+define fp128 @test_cosf128(fp128 %a) {
+; CHECK-LABEL: test_cosf128:
+; CHECK-NOTLD: cosf128
+; CHECK-USELD: cosl
+; CHECK-S390X: cosl
+start:
+ %0 = tail call fp128 @llvm.cos.f128(fp128 %a)
+ ret fp128 %0
+}
+
+define fp128 @test_exp10f128(fp128 %a) {
+; CHECK-LABEL: test_exp2f128:
+; CHECK-NOTLD: exp10f128
+; CHECK-USELD: exp10l
+; CHECK-S390X: exp10l
+start:
+ %0 = tail call fp128 @llvm.exp10.f128(fp128 %a)
+ ret fp128 %0
+}
+
+define fp128 @test_exp2f128(fp128 %a) {
+; CHECK-LABEL: test_exp2f128:
+; CHECK-NOTLD: exp2f128
+; CHECK-USELD: exp2l
+; CHECK-S390X: exp2l
+start:
+ %0 = tail call fp128 @llvm.exp2.f128(fp128 %a)
+ ret fp128 %0
+}
+
+
+define fp128 @test_expf128(fp128 %a) {
+; CHECK-LABEL: test_expf128:
+; CHECK-NOTLD: expf128
+; CHECK-USELD: expl
+; CHECK-S390X: expl
+start:
+ %0 = tail call fp128 @llvm.exp.f128(fp128 %a)
+ ret fp128 %0
+}
+
+define fp128 @test_fabsf128(fp128 %a) {
+; fabs should always get lowered to assembly
+; CHECK-LABEL: test_fabsf128:
+; CHECK-NOT:fabsl
+; CHECK-NOT:fabsf128
+start:
+ %0 = tail call fp128 @llvm.fabs.f128(fp128 %a)
+ ret fp128 %0
+}
+
+define fp128 @test_floorf128(fp128 %a) {
+; CHECK-LABEL: test
[clang] [llvm] [WIP] Correct lowering of `fp128` intrinsics (PR #76558)
https://github.com/tgross35 updated
https://github.com/llvm/llvm-project/pull/76558
>From dafb3a378876db6b3bf505b425b386fd8f79c918 Mon Sep 17 00:00:00 2001
From: Trevor Gross
Date: Fri, 11 Aug 2023 22:16:01 -0400
Subject: [PATCH 1/5] [IR] Add a test for `f128` libcall lowering (NFC)
`f128` intrinsic functions sometimes lower to `long double` library
calls when they instead need to be `f128` versions. Add a test
demonstrating current behavior.
---
.../CodeGen/Generic/f128-math-lowering.ll | 328 ++
1 file changed, 328 insertions(+)
create mode 100644 llvm/test/CodeGen/Generic/f128-math-lowering.ll
diff --git a/llvm/test/CodeGen/Generic/f128-math-lowering.ll
b/llvm/test/CodeGen/Generic/f128-math-lowering.ll
new file mode 100644
index 0..dfbd1eaeda109
--- /dev/null
+++ b/llvm/test/CodeGen/Generic/f128-math-lowering.ll
@@ -0,0 +1,328 @@
+; RUN: llc < %s -mtriple=aarch64-unknown-none -verify-machineinstrs |
FileCheck %s --check-prefix=CHECK-USELD
+; RUN: llc < %s -mtriple=i686-unknown -verify-machineinstrs | FileCheck %s
--check-prefix=CHECK-USELD
+; RUN: llc < %s -mtriple=riscv32 -verify-machineinstrs | FileCheck %s
--check-prefix=CHECK-USELD
+; RUN: llc < %s -mtriple=s390x-unknown -verify-machineinstrs | FileCheck %s
--check-prefix=CHECK-S390X
+; RUN: llc < %s -mtriple=x86_64-unknown -verify-machineinstrs | FileCheck %s
--check-prefix=CHECK-USELD
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -verify-machineinstrs |
FileCheck %s --check-prefix=CHECK-NOTLD
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-musl -verify-machineinstrs |
FileCheck %s --check-prefix=CHECK-USELD
+;
+; REQUIRES: aarch64-registered-target
+; REQUIRES: riscv-registered-target
+; REQUIRES: systemz-registered-target
+; REQUIRES: x86-registered-target
+;
+; Verify that fp128 intrinsics only lower to `long double` calls (e.g. `sinl`)
+; on platforms where `f128` and `long double` have the same layout, and
+; otherwise lower to `f128` versions (e.g. `sinf128`).
+;
+; Targets include:
+; * x86, x64 (80-bit long double)
+; * aarch64 (long double == f128)
+; * riscv32 (long double == f64)
+; * s390x (long double == f128, hardware support)
+; * A couple assorted environments for x86
+;
+; FIXME: only targets where long double is `f128` should be using `USELD`, all
+; others need to be NOTLD. PowerPC should be added but it currently emits an
+; interesting blend of both (e.g. `acosl` but `ceilf128`).
+
+define fp128 @test_acosf128(fp128 %a) {
+; CHECK-LABEL: test_acosf128:
+; CHECK-NOTLD: acosf128
+; CHECK-USELD: acosl
+; CHECK-S390X: acosl
+start:
+ %0 = tail call fp128 @llvm.acos.f128(fp128 %a)
+ ret fp128 %0
+}
+
+define fp128 @test_asinf128(fp128 %a) {
+; CHECK-LABEL: test_asinf128:
+; CHECK-NOTLD: asinf128
+; CHECK-USELD: asinl
+; CHECK-S390X: asinl
+start:
+ %0 = tail call fp128 @llvm.asin.f128(fp128 %a)
+ ret fp128 %0
+}
+
+define fp128 @test_atanf128(fp128 %a) {
+; CHECK-LABEL: test_atanf128:
+; CHECK-NOTLD: atanf128
+; CHECK-USELD: atanl
+; CHECK-S390X: atanl
+start:
+ %0 = tail call fp128 @llvm.atan.f128(fp128 %a)
+ ret fp128 %0
+}
+
+define fp128 @test_ceilf128(fp128 %a) {
+; CHECK-LABEL: test_ceilf128:
+; CHECK-NOTLD: ceilf128
+; CHECK-USELD: ceill
+; CHECK-S390X: ceill
+start:
+ %0 = tail call fp128 @llvm.ceil.f128(fp128 %a)
+ ret fp128 %0
+}
+
+define fp128 @test_copysignf128(fp128 %a, fp128 %b) {
+; copysign should always get lowered to assembly
+; CHECK-LABEL: test_copysignf128:
+; CHECK-NOT:copysignl
+; CHECK-NOT:copysignf128
+start:
+ %0 = tail call fp128 @llvm.copysign.f128(fp128 %a, fp128 %b)
+ ret fp128 %0
+}
+
+define fp128 @test_cosf128(fp128 %a) {
+; CHECK-LABEL: test_cosf128:
+; CHECK-NOTLD: cosf128
+; CHECK-USELD: cosl
+; CHECK-S390X: cosl
+start:
+ %0 = tail call fp128 @llvm.cos.f128(fp128 %a)
+ ret fp128 %0
+}
+
+define fp128 @test_exp10f128(fp128 %a) {
+; CHECK-LABEL: test_exp2f128:
+; CHECK-NOTLD: exp10f128
+; CHECK-USELD: exp10l
+; CHECK-S390X: exp10l
+start:
+ %0 = tail call fp128 @llvm.exp10.f128(fp128 %a)
+ ret fp128 %0
+}
+
+define fp128 @test_exp2f128(fp128 %a) {
+; CHECK-LABEL: test_exp2f128:
+; CHECK-NOTLD: exp2f128
+; CHECK-USELD: exp2l
+; CHECK-S390X: exp2l
+start:
+ %0 = tail call fp128 @llvm.exp2.f128(fp128 %a)
+ ret fp128 %0
+}
+
+
+define fp128 @test_expf128(fp128 %a) {
+; CHECK-LABEL: test_expf128:
+; CHECK-NOTLD: expf128
+; CHECK-USELD: expl
+; CHECK-S390X: expl
+start:
+ %0 = tail call fp128 @llvm.exp.f128(fp128 %a)
+ ret fp128 %0
+}
+
+define fp128 @test_fabsf128(fp128 %a) {
+; fabs should always get lowered to assembly
+; CHECK-LABEL: test_fabsf128:
+; CHECK-NOT:fabsl
+; CHECK-NOT:fabsf128
+start:
+ %0 = tail call fp128 @llvm.fabs.f128(fp128 %a)
+ ret fp128 %0
+}
+
+define fp128 @test_floorf128(fp128 %a) {
+; CHECK-LABEL: test
[clang] [llvm] [WIP] Correct lowering of `fp128` intrinsics (PR #76558)
https://github.com/tgross35 updated
https://github.com/llvm/llvm-project/pull/76558
>From dafb3a378876db6b3bf505b425b386fd8f79c918 Mon Sep 17 00:00:00 2001
From: Trevor Gross
Date: Fri, 11 Aug 2023 22:16:01 -0400
Subject: [PATCH 1/4] [IR] Add a test for `f128` libcall lowering (NFC)
`f128` intrinsic functions sometimes lower to `long double` library
calls when they instead need to be `f128` versions. Add a test
demonstrating current behavior.
---
.../CodeGen/Generic/f128-math-lowering.ll | 328 ++
1 file changed, 328 insertions(+)
create mode 100644 llvm/test/CodeGen/Generic/f128-math-lowering.ll
diff --git a/llvm/test/CodeGen/Generic/f128-math-lowering.ll
b/llvm/test/CodeGen/Generic/f128-math-lowering.ll
new file mode 100644
index 0..dfbd1eaeda109
--- /dev/null
+++ b/llvm/test/CodeGen/Generic/f128-math-lowering.ll
@@ -0,0 +1,328 @@
+; RUN: llc < %s -mtriple=aarch64-unknown-none -verify-machineinstrs |
FileCheck %s --check-prefix=CHECK-USELD
+; RUN: llc < %s -mtriple=i686-unknown -verify-machineinstrs | FileCheck %s
--check-prefix=CHECK-USELD
+; RUN: llc < %s -mtriple=riscv32 -verify-machineinstrs | FileCheck %s
--check-prefix=CHECK-USELD
+; RUN: llc < %s -mtriple=s390x-unknown -verify-machineinstrs | FileCheck %s
--check-prefix=CHECK-S390X
+; RUN: llc < %s -mtriple=x86_64-unknown -verify-machineinstrs | FileCheck %s
--check-prefix=CHECK-USELD
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -verify-machineinstrs |
FileCheck %s --check-prefix=CHECK-NOTLD
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-musl -verify-machineinstrs |
FileCheck %s --check-prefix=CHECK-USELD
+;
+; REQUIRES: aarch64-registered-target
+; REQUIRES: riscv-registered-target
+; REQUIRES: systemz-registered-target
+; REQUIRES: x86-registered-target
+;
+; Verify that fp128 intrinsics only lower to `long double` calls (e.g. `sinl`)
+; on platforms where `f128` and `long double` have the same layout, and
+; otherwise lower to `f128` versions (e.g. `sinf128`).
+;
+; Targets include:
+; * x86, x64 (80-bit long double)
+; * aarch64 (long double == f128)
+; * riscv32 (long double == f64)
+; * s390x (long double == f128, hardware support)
+; * A couple assorted environments for x86
+;
+; FIXME: only targets where long double is `f128` should be using `USELD`, all
+; others need to be NOTLD. PowerPC should be added but it currently emits an
+; interesting blend of both (e.g. `acosl` but `ceilf128`).
+
+define fp128 @test_acosf128(fp128 %a) {
+; CHECK-LABEL: test_acosf128:
+; CHECK-NOTLD: acosf128
+; CHECK-USELD: acosl
+; CHECK-S390X: acosl
+start:
+ %0 = tail call fp128 @llvm.acos.f128(fp128 %a)
+ ret fp128 %0
+}
+
+define fp128 @test_asinf128(fp128 %a) {
+; CHECK-LABEL: test_asinf128:
+; CHECK-NOTLD: asinf128
+; CHECK-USELD: asinl
+; CHECK-S390X: asinl
+start:
+ %0 = tail call fp128 @llvm.asin.f128(fp128 %a)
+ ret fp128 %0
+}
+
+define fp128 @test_atanf128(fp128 %a) {
+; CHECK-LABEL: test_atanf128:
+; CHECK-NOTLD: atanf128
+; CHECK-USELD: atanl
+; CHECK-S390X: atanl
+start:
+ %0 = tail call fp128 @llvm.atan.f128(fp128 %a)
+ ret fp128 %0
+}
+
+define fp128 @test_ceilf128(fp128 %a) {
+; CHECK-LABEL: test_ceilf128:
+; CHECK-NOTLD: ceilf128
+; CHECK-USELD: ceill
+; CHECK-S390X: ceill
+start:
+ %0 = tail call fp128 @llvm.ceil.f128(fp128 %a)
+ ret fp128 %0
+}
+
+define fp128 @test_copysignf128(fp128 %a, fp128 %b) {
+; copysign should always get lowered to assembly
+; CHECK-LABEL: test_copysignf128:
+; CHECK-NOT:copysignl
+; CHECK-NOT:copysignf128
+start:
+ %0 = tail call fp128 @llvm.copysign.f128(fp128 %a, fp128 %b)
+ ret fp128 %0
+}
+
+define fp128 @test_cosf128(fp128 %a) {
+; CHECK-LABEL: test_cosf128:
+; CHECK-NOTLD: cosf128
+; CHECK-USELD: cosl
+; CHECK-S390X: cosl
+start:
+ %0 = tail call fp128 @llvm.cos.f128(fp128 %a)
+ ret fp128 %0
+}
+
+define fp128 @test_exp10f128(fp128 %a) {
+; CHECK-LABEL: test_exp2f128:
+; CHECK-NOTLD: exp10f128
+; CHECK-USELD: exp10l
+; CHECK-S390X: exp10l
+start:
+ %0 = tail call fp128 @llvm.exp10.f128(fp128 %a)
+ ret fp128 %0
+}
+
+define fp128 @test_exp2f128(fp128 %a) {
+; CHECK-LABEL: test_exp2f128:
+; CHECK-NOTLD: exp2f128
+; CHECK-USELD: exp2l
+; CHECK-S390X: exp2l
+start:
+ %0 = tail call fp128 @llvm.exp2.f128(fp128 %a)
+ ret fp128 %0
+}
+
+
+define fp128 @test_expf128(fp128 %a) {
+; CHECK-LABEL: test_expf128:
+; CHECK-NOTLD: expf128
+; CHECK-USELD: expl
+; CHECK-S390X: expl
+start:
+ %0 = tail call fp128 @llvm.exp.f128(fp128 %a)
+ ret fp128 %0
+}
+
+define fp128 @test_fabsf128(fp128 %a) {
+; fabs should always get lowered to assembly
+; CHECK-LABEL: test_fabsf128:
+; CHECK-NOT:fabsl
+; CHECK-NOT:fabsf128
+start:
+ %0 = tail call fp128 @llvm.fabs.f128(fp128 %a)
+ ret fp128 %0
+}
+
+define fp128 @test_floorf128(fp128 %a) {
+; CHECK-LABEL: test
[clang] [llvm] [WIP] Correct lowering of `fp128` intrinsics (PR #76558)
https://github.com/tgross35 updated
https://github.com/llvm/llvm-project/pull/76558
>From dafb3a378876db6b3bf505b425b386fd8f79c918 Mon Sep 17 00:00:00 2001
From: Trevor Gross
Date: Fri, 11 Aug 2023 22:16:01 -0400
Subject: [PATCH 1/3] [IR] Add a test for `f128` libcall lowering (NFC)
`f128` intrinsic functions sometimes lower to `long double` library
calls when they instead need to be `f128` versions. Add a test
demonstrating current behavior.
---
.../CodeGen/Generic/f128-math-lowering.ll | 328 ++
1 file changed, 328 insertions(+)
create mode 100644 llvm/test/CodeGen/Generic/f128-math-lowering.ll
diff --git a/llvm/test/CodeGen/Generic/f128-math-lowering.ll
b/llvm/test/CodeGen/Generic/f128-math-lowering.ll
new file mode 100644
index 0..dfbd1eaeda109
--- /dev/null
+++ b/llvm/test/CodeGen/Generic/f128-math-lowering.ll
@@ -0,0 +1,328 @@
+; RUN: llc < %s -mtriple=aarch64-unknown-none -verify-machineinstrs |
FileCheck %s --check-prefix=CHECK-USELD
+; RUN: llc < %s -mtriple=i686-unknown -verify-machineinstrs | FileCheck %s
--check-prefix=CHECK-USELD
+; RUN: llc < %s -mtriple=riscv32 -verify-machineinstrs | FileCheck %s
--check-prefix=CHECK-USELD
+; RUN: llc < %s -mtriple=s390x-unknown -verify-machineinstrs | FileCheck %s
--check-prefix=CHECK-S390X
+; RUN: llc < %s -mtriple=x86_64-unknown -verify-machineinstrs | FileCheck %s
--check-prefix=CHECK-USELD
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -verify-machineinstrs |
FileCheck %s --check-prefix=CHECK-NOTLD
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-musl -verify-machineinstrs |
FileCheck %s --check-prefix=CHECK-USELD
+;
+; REQUIRES: aarch64-registered-target
+; REQUIRES: riscv-registered-target
+; REQUIRES: systemz-registered-target
+; REQUIRES: x86-registered-target
+;
+; Verify that fp128 intrinsics only lower to `long double` calls (e.g. `sinl`)
+; on platforms where `f128` and `long double` have the same layout, and
+; otherwise lower to `f128` versions (e.g. `sinf128`).
+;
+; Targets include:
+; * x86, x64 (80-bit long double)
+; * aarch64 (long double == f128)
+; * riscv32 (long double == f64)
+; * s390x (long double == f128, hardware support)
+; * A couple assorted environments for x86
+;
+; FIXME: only targets where long double is `f128` should be using `USELD`, all
+; others need to be NOTLD. PowerPC should be added but it currently emits an
+; interesting blend of both (e.g. `acosl` but `ceilf128`).
+
+define fp128 @test_acosf128(fp128 %a) {
+; CHECK-LABEL: test_acosf128:
+; CHECK-NOTLD: acosf128
+; CHECK-USELD: acosl
+; CHECK-S390X: acosl
+start:
+ %0 = tail call fp128 @llvm.acos.f128(fp128 %a)
+ ret fp128 %0
+}
+
+define fp128 @test_asinf128(fp128 %a) {
+; CHECK-LABEL: test_asinf128:
+; CHECK-NOTLD: asinf128
+; CHECK-USELD: asinl
+; CHECK-S390X: asinl
+start:
+ %0 = tail call fp128 @llvm.asin.f128(fp128 %a)
+ ret fp128 %0
+}
+
+define fp128 @test_atanf128(fp128 %a) {
+; CHECK-LABEL: test_atanf128:
+; CHECK-NOTLD: atanf128
+; CHECK-USELD: atanl
+; CHECK-S390X: atanl
+start:
+ %0 = tail call fp128 @llvm.atan.f128(fp128 %a)
+ ret fp128 %0
+}
+
+define fp128 @test_ceilf128(fp128 %a) {
+; CHECK-LABEL: test_ceilf128:
+; CHECK-NOTLD: ceilf128
+; CHECK-USELD: ceill
+; CHECK-S390X: ceill
+start:
+ %0 = tail call fp128 @llvm.ceil.f128(fp128 %a)
+ ret fp128 %0
+}
+
+define fp128 @test_copysignf128(fp128 %a, fp128 %b) {
+; copysign should always get lowered to assembly
+; CHECK-LABEL: test_copysignf128:
+; CHECK-NOT:copysignl
+; CHECK-NOT:copysignf128
+start:
+ %0 = tail call fp128 @llvm.copysign.f128(fp128 %a, fp128 %b)
+ ret fp128 %0
+}
+
+define fp128 @test_cosf128(fp128 %a) {
+; CHECK-LABEL: test_cosf128:
+; CHECK-NOTLD: cosf128
+; CHECK-USELD: cosl
+; CHECK-S390X: cosl
+start:
+ %0 = tail call fp128 @llvm.cos.f128(fp128 %a)
+ ret fp128 %0
+}
+
+define fp128 @test_exp10f128(fp128 %a) {
+; CHECK-LABEL: test_exp2f128:
+; CHECK-NOTLD: exp10f128
+; CHECK-USELD: exp10l
+; CHECK-S390X: exp10l
+start:
+ %0 = tail call fp128 @llvm.exp10.f128(fp128 %a)
+ ret fp128 %0
+}
+
+define fp128 @test_exp2f128(fp128 %a) {
+; CHECK-LABEL: test_exp2f128:
+; CHECK-NOTLD: exp2f128
+; CHECK-USELD: exp2l
+; CHECK-S390X: exp2l
+start:
+ %0 = tail call fp128 @llvm.exp2.f128(fp128 %a)
+ ret fp128 %0
+}
+
+
+define fp128 @test_expf128(fp128 %a) {
+; CHECK-LABEL: test_expf128:
+; CHECK-NOTLD: expf128
+; CHECK-USELD: expl
+; CHECK-S390X: expl
+start:
+ %0 = tail call fp128 @llvm.exp.f128(fp128 %a)
+ ret fp128 %0
+}
+
+define fp128 @test_fabsf128(fp128 %a) {
+; fabs should always get lowered to assembly
+; CHECK-LABEL: test_fabsf128:
+; CHECK-NOT:fabsl
+; CHECK-NOT:fabsf128
+start:
+ %0 = tail call fp128 @llvm.fabs.f128(fp128 %a)
+ ret fp128 %0
+}
+
+define fp128 @test_floorf128(fp128 %a) {
+; CHECK-LABEL: test
[clang] [llvm] [WIP] Correct lowering of `fp128` intrinsics (PR #76558)
https://github.com/tgross35 updated
https://github.com/llvm/llvm-project/pull/76558
>From 9089a966f2107a2dfcad86fec746e9f2bd8a105b Mon Sep 17 00:00:00 2001
From: Trevor Gross
Date: Fri, 11 Aug 2023 22:16:01 -0400
Subject: [PATCH 1/5] [IR] Add a test for `f128` libcall lowering (NFC)
`f128` intrinsic functions sometimes lower to `long double` library
calls when they instead need to be `f128` versions. Add a test
demonstrating current behavior.
---
.../CodeGen/Generic/f128-math-lowering.ll | 328 ++
1 file changed, 328 insertions(+)
create mode 100644 llvm/test/CodeGen/Generic/f128-math-lowering.ll
diff --git a/llvm/test/CodeGen/Generic/f128-math-lowering.ll
b/llvm/test/CodeGen/Generic/f128-math-lowering.ll
new file mode 100644
index 0..dfbd1eaeda109
--- /dev/null
+++ b/llvm/test/CodeGen/Generic/f128-math-lowering.ll
@@ -0,0 +1,328 @@
+; RUN: llc < %s -mtriple=aarch64-unknown-none -verify-machineinstrs |
FileCheck %s --check-prefix=CHECK-USELD
+; RUN: llc < %s -mtriple=i686-unknown -verify-machineinstrs | FileCheck %s
--check-prefix=CHECK-USELD
+; RUN: llc < %s -mtriple=riscv32 -verify-machineinstrs | FileCheck %s
--check-prefix=CHECK-USELD
+; RUN: llc < %s -mtriple=s390x-unknown -verify-machineinstrs | FileCheck %s
--check-prefix=CHECK-S390X
+; RUN: llc < %s -mtriple=x86_64-unknown -verify-machineinstrs | FileCheck %s
--check-prefix=CHECK-USELD
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -verify-machineinstrs |
FileCheck %s --check-prefix=CHECK-NOTLD
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-musl -verify-machineinstrs |
FileCheck %s --check-prefix=CHECK-USELD
+;
+; REQUIRES: aarch64-registered-target
+; REQUIRES: riscv-registered-target
+; REQUIRES: systemz-registered-target
+; REQUIRES: x86-registered-target
+;
+; Verify that fp128 intrinsics only lower to `long double` calls (e.g. `sinl`)
+; on platforms where `f128` and `long double` have the same layout, and
+; otherwise lower to `f128` versions (e.g. `sinf128`).
+;
+; Targets include:
+; * x86, x64 (80-bit long double)
+; * aarch64 (long double == f128)
+; * riscv32 (long double == f64)
+; * s390x (long double == f128, hardware support)
+; * A couple assorted environments for x86
+;
+; FIXME: only targets where long double is `f128` should be using `USELD`, all
+; others need to be NOTLD. PowerPC should be added but it currently emits an
+; interesting blend of both (e.g. `acosl` but `ceilf128`).
+
+define fp128 @test_acosf128(fp128 %a) {
+; CHECK-LABEL: test_acosf128:
+; CHECK-NOTLD: acosf128
+; CHECK-USELD: acosl
+; CHECK-S390X: acosl
+start:
+ %0 = tail call fp128 @llvm.acos.f128(fp128 %a)
+ ret fp128 %0
+}
+
+define fp128 @test_asinf128(fp128 %a) {
+; CHECK-LABEL: test_asinf128:
+; CHECK-NOTLD: asinf128
+; CHECK-USELD: asinl
+; CHECK-S390X: asinl
+start:
+ %0 = tail call fp128 @llvm.asin.f128(fp128 %a)
+ ret fp128 %0
+}
+
+define fp128 @test_atanf128(fp128 %a) {
+; CHECK-LABEL: test_atanf128:
+; CHECK-NOTLD: atanf128
+; CHECK-USELD: atanl
+; CHECK-S390X: atanl
+start:
+ %0 = tail call fp128 @llvm.atan.f128(fp128 %a)
+ ret fp128 %0
+}
+
+define fp128 @test_ceilf128(fp128 %a) {
+; CHECK-LABEL: test_ceilf128:
+; CHECK-NOTLD: ceilf128
+; CHECK-USELD: ceill
+; CHECK-S390X: ceill
+start:
+ %0 = tail call fp128 @llvm.ceil.f128(fp128 %a)
+ ret fp128 %0
+}
+
+define fp128 @test_copysignf128(fp128 %a, fp128 %b) {
+; copysign should always get lowered to assembly
+; CHECK-LABEL: test_copysignf128:
+; CHECK-NOT:copysignl
+; CHECK-NOT:copysignf128
+start:
+ %0 = tail call fp128 @llvm.copysign.f128(fp128 %a, fp128 %b)
+ ret fp128 %0
+}
+
+define fp128 @test_cosf128(fp128 %a) {
+; CHECK-LABEL: test_cosf128:
+; CHECK-NOTLD: cosf128
+; CHECK-USELD: cosl
+; CHECK-S390X: cosl
+start:
+ %0 = tail call fp128 @llvm.cos.f128(fp128 %a)
+ ret fp128 %0
+}
+
+define fp128 @test_exp10f128(fp128 %a) {
+; CHECK-LABEL: test_exp2f128:
+; CHECK-NOTLD: exp10f128
+; CHECK-USELD: exp10l
+; CHECK-S390X: exp10l
+start:
+ %0 = tail call fp128 @llvm.exp10.f128(fp128 %a)
+ ret fp128 %0
+}
+
+define fp128 @test_exp2f128(fp128 %a) {
+; CHECK-LABEL: test_exp2f128:
+; CHECK-NOTLD: exp2f128
+; CHECK-USELD: exp2l
+; CHECK-S390X: exp2l
+start:
+ %0 = tail call fp128 @llvm.exp2.f128(fp128 %a)
+ ret fp128 %0
+}
+
+
+define fp128 @test_expf128(fp128 %a) {
+; CHECK-LABEL: test_expf128:
+; CHECK-NOTLD: expf128
+; CHECK-USELD: expl
+; CHECK-S390X: expl
+start:
+ %0 = tail call fp128 @llvm.exp.f128(fp128 %a)
+ ret fp128 %0
+}
+
+define fp128 @test_fabsf128(fp128 %a) {
+; fabs should always get lowered to assembly
+; CHECK-LABEL: test_fabsf128:
+; CHECK-NOT:fabsl
+; CHECK-NOT:fabsf128
+start:
+ %0 = tail call fp128 @llvm.fabs.f128(fp128 %a)
+ ret fp128 %0
+}
+
+define fp128 @test_floorf128(fp128 %a) {
+; CHECK-LABEL: test
[clang] [llvm] [WIP] Correct lowering of `fp128` intrinsics (PR #76558)
tgross35 wrote: Talked to arsenm on discord, long discussion starting around here https://discord.com/channels/636084430946959380/636732535434510338/1362207130559578185. The outcome is that this is effectively a target option and needs to be tied to the triple rather than per-module. Which makes sense and avoids the above problem. So, I'll be doing the following: 1. Make LLVM assume that `sqrtf128` (and similar) libcalls are available by default 2. On 64-bit arm, loongarch, mips, risc-v, and s390x musl targets, use `sqrtl` instead 3. Add some way to make musl targets also use `sqrtf128`, like `-nolongdouble` in the target triple This should work because calling `sqrtf128` is correct on _most_ platforms: * On Windows, Apple, and 32-bit platforms, `long double` is `f64` so `sqrtf128` is the only correct call * On x86, `long double` is the x87 80-bit float so `sqrtf128` is the only correct call * On anything glibc, `sqrtf128` is an alias to `sqrtl` on platforms where that works, so `sqrtf128` can always be called * That leaves 64-bit `musl` on platforms where `long double` is `f128` as the only platforms where `sqrtl` has to be called (otherwise calling `sqrtl` from C would get intercepted and relowered as `sqrtf128` for a linker error) https://github.com/llvm/llvm-project/pull/76558 ___ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [WIP] Correct lowering of `fp128` intrinsics (PR #76558)
@@ -0,0 +1,331 @@ +; RUN: llc < %s -mtriple=aarch64-unknown-none -verify-machineinstrs | FileCheck %s --check-prefix=CHECK-USELD +; RUN: llc < %s -mtriple=i686-unknown -verify-machineinstrs | FileCheck %s --check-prefix=CHECK-NOTLD +; RUN: llc < %s -mtriple=powerpc-unknown -verify-machineinstrs | FileCheck %s --check-prefix=CHECK-NOTLD +; RUN: llc < %s -mtriple=powerpc64-unknown -verify-machineinstrs | FileCheck %s --check-prefix=CHECK-NOTLD +; RUN: llc < %s -mtriple=riscv32 -verify-machineinstrs | FileCheck %s --check-prefix=CHECK-NOTLD +; RUN: llc < %s -mtriple=s390x-unknown -verify-machineinstrs | FileCheck %s --check-prefix=CHECK-S390X +; RUN: llc < %s -mtriple=x86_64-unknown -verify-machineinstrs | FileCheck %s --check-prefix=CHECK-NOTLD +; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -verify-machineinstrs | FileCheck %s --check-prefix=CHECK-NOTLD +; RUN: llc < %s -mtriple=x86_64-unknown-linux-musl -verify-machineinstrs | FileCheck %s --check-prefix=CHECK-NOTLD +; +; REQUIRES: aarch64-registered-target +; REQUIRES: powerpc-registered-target +; REQUIRES: riscv-registered-target +; REQUIRES: systemz-registered-target +; REQUIRES: x86-registered-target tgross35 wrote: Todo: replace this with `%if` somehow so this test still runs if only a subset of architectures is available https://llvm.org/docs/TestingGuide.html https://github.com/llvm/llvm-project/pull/76558 ___ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [WIP] Correct lowering of `fp128` intrinsics (PR #76558)
tgross35 wrote: In either case, I need to have the module flags available pretty early and I'm not sure how to do that. Ideally they would be available when `TargetLowering` is constructed or sometime before it is used for lowering, but it only gets a `TargetMachine` as a paremeter. All values in `TargetOptions` seem to be configured once and don't pay attention to module flags or take the module as a parameter - is there a reason for that? I'm wondering if `TargetMachine` is intended to be unchanging across different modules. https://github.com/llvm/llvm-project/pull/76558 ___ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [WIP] Correct lowering of `fp128` intrinsics (PR #76558)
tgross35 wrote: Finally getting around to this after more than a year. @efriedma-quic as an alternative to the current implementation of duplicating `long double` layout information from Clang to LLVM, would it work if LLVM lowers to `*f128` calls but provides a module flag `fp128_use_long_double_libcalls` to prefer the `*l` versions? So if Clang or other frontends know that their `long double` is `_Float128`, it can select those libcalls. The advantage is avoided code duplication and the logic is easier to follow. Also this avoids problems if linking a library built with an unexpected `-mlong-double-` configuration. The disadvantage is that frontends that don't know about C's `long double` can't benefit from the more common `*l` symbols. I don't think this is too big of a problem though: it makes no difference with glibc (the f128 aliases have been around sufficiently long) or on any platforms where `long double` is not `_Float128`. And it is easy enough for frontends to set `fp128_use_long_double_libcalls` on a case-by-case basis if they know what math library is being used (e.g. aarch64 musl). (I handle the f128 support for Rust and would much rather never think about `*l` symbols, I can alias them to `*f128` if needed or set the flag) https://github.com/llvm/llvm-project/pull/76558 ___ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [WIP] Correct lowering of `fp128` intrinsics (PR #76558)
https://github.com/tgross35 updated
https://github.com/llvm/llvm-project/pull/76558
>From 31405591b5661156348ec7a45e66eb43e0ace15b Mon Sep 17 00:00:00 2001
From: Trevor Gross
Date: Fri, 11 Aug 2023 22:16:01 -0400
Subject: [PATCH 1/6] [IR] Add a test for `f128` libcall lowering (nfc)
`f128` intrinsic functions sometimes lower to `long double` library
calls when they instead need to be `f128` versions. Add a test
demonstrating current behavior.
---
.../CodeGen/Generic/f128-math-lowering.ll | 328 ++
1 file changed, 328 insertions(+)
create mode 100644 llvm/test/CodeGen/Generic/f128-math-lowering.ll
diff --git a/llvm/test/CodeGen/Generic/f128-math-lowering.ll
b/llvm/test/CodeGen/Generic/f128-math-lowering.ll
new file mode 100644
index 0..dfbd1eaeda109
--- /dev/null
+++ b/llvm/test/CodeGen/Generic/f128-math-lowering.ll
@@ -0,0 +1,328 @@
+; RUN: llc < %s -mtriple=aarch64-unknown-none -verify-machineinstrs |
FileCheck %s --check-prefix=CHECK-USELD
+; RUN: llc < %s -mtriple=i686-unknown -verify-machineinstrs | FileCheck %s
--check-prefix=CHECK-USELD
+; RUN: llc < %s -mtriple=riscv32 -verify-machineinstrs | FileCheck %s
--check-prefix=CHECK-USELD
+; RUN: llc < %s -mtriple=s390x-unknown -verify-machineinstrs | FileCheck %s
--check-prefix=CHECK-S390X
+; RUN: llc < %s -mtriple=x86_64-unknown -verify-machineinstrs | FileCheck %s
--check-prefix=CHECK-USELD
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -verify-machineinstrs |
FileCheck %s --check-prefix=CHECK-NOTLD
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-musl -verify-machineinstrs |
FileCheck %s --check-prefix=CHECK-USELD
+;
+; REQUIRES: aarch64-registered-target
+; REQUIRES: riscv-registered-target
+; REQUIRES: systemz-registered-target
+; REQUIRES: x86-registered-target
+;
+; Verify that fp128 intrinsics only lower to `long double` calls (e.g. `sinl`)
+; on platforms where `f128` and `long double` have the same layout, and
+; otherwise lower to `f128` versions (e.g. `sinf128`).
+;
+; Targets include:
+; * x86, x64 (80-bit long double)
+; * aarch64 (long double == f128)
+; * riscv32 (long double == f64)
+; * s390x (long double == f128, hardware support)
+; * A couple assorted environments for x86
+;
+; FIXME: only targets where long double is `f128` should be using `USELD`, all
+; others need to be NOTLD. PowerPC should be added but it currently emits an
+; interesting blend of both (e.g. `acosl` but `ceilf128`).
+
+define fp128 @test_acosf128(fp128 %a) {
+; CHECK-LABEL: test_acosf128:
+; CHECK-NOTLD: acosf128
+; CHECK-USELD: acosl
+; CHECK-S390X: acosl
+start:
+ %0 = tail call fp128 @llvm.acos.f128(fp128 %a)
+ ret fp128 %0
+}
+
+define fp128 @test_asinf128(fp128 %a) {
+; CHECK-LABEL: test_asinf128:
+; CHECK-NOTLD: asinf128
+; CHECK-USELD: asinl
+; CHECK-S390X: asinl
+start:
+ %0 = tail call fp128 @llvm.asin.f128(fp128 %a)
+ ret fp128 %0
+}
+
+define fp128 @test_atanf128(fp128 %a) {
+; CHECK-LABEL: test_atanf128:
+; CHECK-NOTLD: atanf128
+; CHECK-USELD: atanl
+; CHECK-S390X: atanl
+start:
+ %0 = tail call fp128 @llvm.atan.f128(fp128 %a)
+ ret fp128 %0
+}
+
+define fp128 @test_ceilf128(fp128 %a) {
+; CHECK-LABEL: test_ceilf128:
+; CHECK-NOTLD: ceilf128
+; CHECK-USELD: ceill
+; CHECK-S390X: ceill
+start:
+ %0 = tail call fp128 @llvm.ceil.f128(fp128 %a)
+ ret fp128 %0
+}
+
+define fp128 @test_copysignf128(fp128 %a, fp128 %b) {
+; copysign should always get lowered to assembly
+; CHECK-LABEL: test_copysignf128:
+; CHECK-NOT:copysignl
+; CHECK-NOT:copysignf128
+start:
+ %0 = tail call fp128 @llvm.copysign.f128(fp128 %a, fp128 %b)
+ ret fp128 %0
+}
+
+define fp128 @test_cosf128(fp128 %a) {
+; CHECK-LABEL: test_cosf128:
+; CHECK-NOTLD: cosf128
+; CHECK-USELD: cosl
+; CHECK-S390X: cosl
+start:
+ %0 = tail call fp128 @llvm.cos.f128(fp128 %a)
+ ret fp128 %0
+}
+
+define fp128 @test_exp10f128(fp128 %a) {
+; CHECK-LABEL: test_exp2f128:
+; CHECK-NOTLD: exp10f128
+; CHECK-USELD: exp10l
+; CHECK-S390X: exp10l
+start:
+ %0 = tail call fp128 @llvm.exp10.f128(fp128 %a)
+ ret fp128 %0
+}
+
+define fp128 @test_exp2f128(fp128 %a) {
+; CHECK-LABEL: test_exp2f128:
+; CHECK-NOTLD: exp2f128
+; CHECK-USELD: exp2l
+; CHECK-S390X: exp2l
+start:
+ %0 = tail call fp128 @llvm.exp2.f128(fp128 %a)
+ ret fp128 %0
+}
+
+
+define fp128 @test_expf128(fp128 %a) {
+; CHECK-LABEL: test_expf128:
+; CHECK-NOTLD: expf128
+; CHECK-USELD: expl
+; CHECK-S390X: expl
+start:
+ %0 = tail call fp128 @llvm.exp.f128(fp128 %a)
+ ret fp128 %0
+}
+
+define fp128 @test_fabsf128(fp128 %a) {
+; fabs should always get lowered to assembly
+; CHECK-LABEL: test_fabsf128:
+; CHECK-NOT:fabsl
+; CHECK-NOT:fabsf128
+start:
+ %0 = tail call fp128 @llvm.fabs.f128(fp128 %a)
+ ret fp128 %0
+}
+
+define fp128 @test_floorf128(fp128 %a) {
+; CHECK-LABEL: test
[clang] [llvm] [WIP] Correct lowering of `fp128` intrinsics (PR #76558)
https://github.com/tgross35 updated
https://github.com/llvm/llvm-project/pull/76558
>From f110337467d5a2b1f624eab507daa2bc854def17 Mon Sep 17 00:00:00 2001
From: Trevor Gross
Date: Fri, 11 Aug 2023 22:16:01 -0400
Subject: [PATCH 1/4] [IR] Add an xpassing test for `f128` intrinsic lowering
(nfc)
`f128` intrinsic functions lower to incorrect libc calls. Add a test
showing current behavior.
---
.../CodeGen/Generic/f128-math-lowering.ll | 610 ++
1 file changed, 610 insertions(+)
create mode 100644 llvm/test/CodeGen/Generic/f128-math-lowering.ll
diff --git a/llvm/test/CodeGen/Generic/f128-math-lowering.ll
b/llvm/test/CodeGen/Generic/f128-math-lowering.ll
new file mode 100644
index 0..8a70786d97fe6
--- /dev/null
+++ b/llvm/test/CodeGen/Generic/f128-math-lowering.ll
@@ -0,0 +1,610 @@
+
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
UTC_ARGS: --version 2
+;
+; RUN: llc < %s -mtriple=aarch64-unknown-unknown -verify-machineinstrs |
FileCheck %s --check-prefix=CHECK-AARCH64
+; RUN: llc < %s -mtriple=riscv32-unknown-unknown -verify-machineinstrs |
FileCheck %s --check-prefix=CHECK-RISCV32
+; RUN: llc < %s -mtriple=s390x-unknown-unknown -verify-machineinstrs |
FileCheck %s --check-prefix=CHECK-S390X
+; RUN: llc < %s -mtriple=i686-unknown-unknown -verify-machineinstrs |
FileCheck %s --check-prefix=CHECK-X86
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -verify-machineinstrs |
FileCheck %s --check-prefix=CHECK-X64
+;
+; Verify that fp128 intrinsics only lower to `long double` calls on platforms
+; where `f128` and `long double` have the same layout.
+;
+; We test on x86 and x64 which have 80-bit ld, as well as aarch64 (ld == f128),
+; riscv32 (ld == f64), and s380x (ld == f128 with different alignment from
+; x64/aarch64 f128).
+;
+; FIXME: these emit calls to long double functions but should emit f128 calls
+
+define fp128 @test_cbrtf128(fp128 %a) {
+; CHECK-LABEL: test_cbrtf128:
+; CHECK-AARCH64:b llvm.cbrt.f128
+; CHECK-RISCV32:call llvm.cbrt.f128@plt
+; CHECK-S390X: brasl {{%.*}} llvm.cbrt.f128@PLT
+; CHECK-X64:jmp llvm.cbrt.f128@PLT # TAILCALL
+; CHECK-X86:calll llvm.cbrt.f128@PLT
+start:
+ %0 = tail call fp128 @llvm.cbrt.f128(fp128 %a)
+ ret fp128 %0
+}
+
+declare fp128 @llvm.cbrt.f128(fp128)
+
+
+define fp128 @test_ceilf128(fp128 %a) {
+; CHECK-LABEL: test_ceilf128:
+; CHECK-AARCH64:b ceill
+; CHECK-RISCV32:call ceill@plt
+; CHECK-S390X: brasl {{%.*}} ceill@PLT
+; CHECK-X64:jmp ceill@PLT
+; CHECK-X86:calll ceill
+start:
+ %0 = tail call fp128 @llvm.ceil.f128(fp128 %a)
+ ret fp128 %0
+}
+
+declare fp128 @llvm.ceil.f128(fp128)
+
+
+define fp128 @test_copysignf128(fp128 %a, fp128 %b) {
+; No math library call here, so make sure the assembly does the correct thing.
+; This test is autogenerated
+; CHECK-LABEL:test_copysignf128:
+; CHECK-AARCH64-LABEL: test_copysignf128:
+; CHECK-AARCH64: // %bb.0: // %start
+; CHECK-AARCH64-NEXT:stp q0, q1, [sp, #-32]!
+; CHECK-AARCH64-NEXT:.cfi_def_cfa_offset 32
+; CHECK-AARCH64-NEXT:ldrb w8, [sp, #15]
+; CHECK-AARCH64-NEXT:ldrb w9, [sp, #31]
+; CHECK-AARCH64-NEXT:bfxil w9, w8, #0, #7
+; CHECK-AARCH64-NEXT:strb w9, [sp, #15]
+; CHECK-AARCH64-NEXT:ldr q0, [sp], #32
+; CHECK-AARCH64-NEXT:ret
+;
+; CHECK-RISCV32-LABEL: test_copysignf128:
+; CHECK-RISCV32: # %bb.0: # %start
+; CHECK-RISCV32-NEXT:lw a3, 0(a1)
+; CHECK-RISCV32-NEXT:lw a4, 4(a1)
+; CHECK-RISCV32-NEXT:lw a2, 12(a2)
+; CHECK-RISCV32-NEXT:lw a5, 12(a1)
+; CHECK-RISCV32-NEXT:lw a1, 8(a1)
+; CHECK-RISCV32-NEXT:lui a6, 524288
+; CHECK-RISCV32-NEXT:and a2, a2, a6
+; CHECK-RISCV32-NEXT:slli a5, a5, 1
+; CHECK-RISCV32-NEXT:srli a5, a5, 1
+; CHECK-RISCV32-NEXT:or a2, a5, a2
+; CHECK-RISCV32-NEXT:sw a1, 8(a0)
+; CHECK-RISCV32-NEXT:sw a4, 4(a0)
+; CHECK-RISCV32-NEXT:sw a3, 0(a0)
+; CHECK-RISCV32-NEXT:sw a2, 12(a0)
+; CHECK-RISCV32-NEXT:ret
+;
+; CHECK-S390X-LABEL: test_copysignf128:
+; CHECK-S390X: # %bb.0: # %start
+; CHECK-S390X-NEXT:ld %f0, 0(%r3)
+; CHECK-S390X-NEXT:ld %f2, 8(%r3)
+; CHECK-S390X-NEXT:ld %f1, 0(%r4)
+; CHECK-S390X-NEXT:ld %f3, 8(%r4)
+; CHECK-S390X-NEXT:cpsdr %f0, %f1, %f0
+; CHECK-S390X-NEXT:std %f0, 0(%r2)
+; CHECK-S390X-NEXT:std %f2, 8(%r2)
+; CHECK-S390X-NEXT:br %r14
+;
+; CHECK-X86-LABEL: test_copysignf128:
+; CHECK-X86: # %bb.0: # %start
+; CHECK-X86-NEXT:pushl %ebx
+; CHECK-X86-NEXT:.cfi_def_cfa_offset 8
+; CHECK-X86-NEXT:pushl %edi
+; CHECK-X86-NEXT:.cfi_def_cfa_offset 12
+; CHECK-X86-NEXT:pushl %esi
+; CHECK-X86-NEXT:.cfi_def_cfa_offset 16
+; CHECK-X86-NEXT:.cfi_offset %esi, -16
+; CHECK-X86-NEXT:.cfi_offset %edi, -12
+; CHECK-X86-NEXT:.cfi_offset %ebx, -8
+; CHECK-X86-NEXT:movl {{[0-9]+}}(%esp), %eax
+; CHECK-X86-NEXT:movl {{[0-9]+}}(%esp), %ecx
+; CHECK-
[clang] [llvm] [WIP] Correct lowering of `fp128` intrinsics (PR #76558)
efriedma-quic wrote: Putting a function in TargetMachine seems reasonable. https://github.com/llvm/llvm-project/pull/76558 ___ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [WIP] Correct lowering of `fp128` intrinsics (PR #76558)
tgross35 wrote: I'm struggling a bit with how to handle ABI information since that affects layout (e.g. ARM aapcs), which I think explains most of the errors in https://buildkite.com/llvm-project/github-pull-requests/builds/31198#018d26e2-fd17-4e15-a1eb-08580c189056. This needs to be available at TargetLoweringBase::InitLibcalls, which calls [`getCLayouts`](https://github.com/llvm/llvm-project/blob/cb3bf7540cf9b797575c625318e47a33f7514fad/llvm/lib/TargetParser/Triple.cpp#L1945). TargetMachine is available at that time, so would it be better to move CLayouts from Triple to TargetMachine? If so subclasses could be used rather than the if block, which more closely follows the Clang side. Also, are there currently any module flags that make it to TargetLowering? Looking for a reference on how get the -mlong-double-128 information. https://github.com/llvm/llvm-project/pull/76558 ___ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [WIP] Correct lowering of `fp128` intrinsics (PR #76558)
https://github.com/tgross35 updated
https://github.com/llvm/llvm-project/pull/76558
>From e869ad1bc601d95b6364dc5619e79a06e8b0fc82 Mon Sep 17 00:00:00 2001
From: Trevor Gross
Date: Fri, 11 Aug 2023 22:16:01 -0400
Subject: [PATCH 1/5] [IR] Add an xpassing test for `f128` intrinsic lowering
`f128` intrinsic functions lower to incorrect libc calls. Add a test
showing current behavior.
---
.../CodeGen/Generic/f128-math-lowering.ll | 610 ++
1 file changed, 610 insertions(+)
create mode 100644 llvm/test/CodeGen/Generic/f128-math-lowering.ll
diff --git a/llvm/test/CodeGen/Generic/f128-math-lowering.ll
b/llvm/test/CodeGen/Generic/f128-math-lowering.ll
new file mode 100644
index 00..8a70786d97fe67
--- /dev/null
+++ b/llvm/test/CodeGen/Generic/f128-math-lowering.ll
@@ -0,0 +1,610 @@
+
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
UTC_ARGS: --version 2
+;
+; RUN: llc < %s -mtriple=aarch64-unknown-unknown -verify-machineinstrs |
FileCheck %s --check-prefix=CHECK-AARCH64
+; RUN: llc < %s -mtriple=riscv32-unknown-unknown -verify-machineinstrs |
FileCheck %s --check-prefix=CHECK-RISCV32
+; RUN: llc < %s -mtriple=s390x-unknown-unknown -verify-machineinstrs |
FileCheck %s --check-prefix=CHECK-S390X
+; RUN: llc < %s -mtriple=i686-unknown-unknown -verify-machineinstrs |
FileCheck %s --check-prefix=CHECK-X86
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -verify-machineinstrs |
FileCheck %s --check-prefix=CHECK-X64
+;
+; Verify that fp128 intrinsics only lower to `long double` calls on platforms
+; where `f128` and `long double` have the same layout.
+;
+; We test on x86 and x64 which have 80-bit ld, as well as aarch64 (ld == f128),
+; riscv32 (ld == f64), and s380x (ld == f128 with different alignment from
+; x64/aarch64 f128).
+;
+; FIXME: these emit calls to long double functions but should emit f128 calls
+
+define fp128 @test_cbrtf128(fp128 %a) {
+; CHECK-LABEL: test_cbrtf128:
+; CHECK-AARCH64:b llvm.cbrt.f128
+; CHECK-RISCV32:call llvm.cbrt.f128@plt
+; CHECK-S390X: brasl {{%.*}} llvm.cbrt.f128@PLT
+; CHECK-X64:jmp llvm.cbrt.f128@PLT # TAILCALL
+; CHECK-X86:calll llvm.cbrt.f128@PLT
+start:
+ %0 = tail call fp128 @llvm.cbrt.f128(fp128 %a)
+ ret fp128 %0
+}
+
+declare fp128 @llvm.cbrt.f128(fp128)
+
+
+define fp128 @test_ceilf128(fp128 %a) {
+; CHECK-LABEL: test_ceilf128:
+; CHECK-AARCH64:b ceill
+; CHECK-RISCV32:call ceill@plt
+; CHECK-S390X: brasl {{%.*}} ceill@PLT
+; CHECK-X64:jmp ceill@PLT
+; CHECK-X86:calll ceill
+start:
+ %0 = tail call fp128 @llvm.ceil.f128(fp128 %a)
+ ret fp128 %0
+}
+
+declare fp128 @llvm.ceil.f128(fp128)
+
+
+define fp128 @test_copysignf128(fp128 %a, fp128 %b) {
+; No math library call here, so make sure the assembly does the correct thing.
+; This test is autogenerated
+; CHECK-LABEL:test_copysignf128:
+; CHECK-AARCH64-LABEL: test_copysignf128:
+; CHECK-AARCH64: // %bb.0: // %start
+; CHECK-AARCH64-NEXT:stp q0, q1, [sp, #-32]!
+; CHECK-AARCH64-NEXT:.cfi_def_cfa_offset 32
+; CHECK-AARCH64-NEXT:ldrb w8, [sp, #15]
+; CHECK-AARCH64-NEXT:ldrb w9, [sp, #31]
+; CHECK-AARCH64-NEXT:bfxil w9, w8, #0, #7
+; CHECK-AARCH64-NEXT:strb w9, [sp, #15]
+; CHECK-AARCH64-NEXT:ldr q0, [sp], #32
+; CHECK-AARCH64-NEXT:ret
+;
+; CHECK-RISCV32-LABEL: test_copysignf128:
+; CHECK-RISCV32: # %bb.0: # %start
+; CHECK-RISCV32-NEXT:lw a3, 0(a1)
+; CHECK-RISCV32-NEXT:lw a4, 4(a1)
+; CHECK-RISCV32-NEXT:lw a2, 12(a2)
+; CHECK-RISCV32-NEXT:lw a5, 12(a1)
+; CHECK-RISCV32-NEXT:lw a1, 8(a1)
+; CHECK-RISCV32-NEXT:lui a6, 524288
+; CHECK-RISCV32-NEXT:and a2, a2, a6
+; CHECK-RISCV32-NEXT:slli a5, a5, 1
+; CHECK-RISCV32-NEXT:srli a5, a5, 1
+; CHECK-RISCV32-NEXT:or a2, a5, a2
+; CHECK-RISCV32-NEXT:sw a1, 8(a0)
+; CHECK-RISCV32-NEXT:sw a4, 4(a0)
+; CHECK-RISCV32-NEXT:sw a3, 0(a0)
+; CHECK-RISCV32-NEXT:sw a2, 12(a0)
+; CHECK-RISCV32-NEXT:ret
+;
+; CHECK-S390X-LABEL: test_copysignf128:
+; CHECK-S390X: # %bb.0: # %start
+; CHECK-S390X-NEXT:ld %f0, 0(%r3)
+; CHECK-S390X-NEXT:ld %f2, 8(%r3)
+; CHECK-S390X-NEXT:ld %f1, 0(%r4)
+; CHECK-S390X-NEXT:ld %f3, 8(%r4)
+; CHECK-S390X-NEXT:cpsdr %f0, %f1, %f0
+; CHECK-S390X-NEXT:std %f0, 0(%r2)
+; CHECK-S390X-NEXT:std %f2, 8(%r2)
+; CHECK-S390X-NEXT:br %r14
+;
+; CHECK-X86-LABEL: test_copysignf128:
+; CHECK-X86: # %bb.0: # %start
+; CHECK-X86-NEXT:pushl %ebx
+; CHECK-X86-NEXT:.cfi_def_cfa_offset 8
+; CHECK-X86-NEXT:pushl %edi
+; CHECK-X86-NEXT:.cfi_def_cfa_offset 12
+; CHECK-X86-NEXT:pushl %esi
+; CHECK-X86-NEXT:.cfi_def_cfa_offset 16
+; CHECK-X86-NEXT:.cfi_offset %esi, -16
+; CHECK-X86-NEXT:.cfi_offset %edi, -12
+; CHECK-X86-NEXT:.cfi_offset %ebx, -8
+; CHECK-X86-NEXT:movl {{[0-9]+}}(%esp), %eax
+; CHECK-X86-NEXT:movl {{[0-9]+}}(%esp), %ecx
+; CHECK-X86-N
[clang] [llvm] [WIP] Correct lowering of `fp128` intrinsics (PR #76558)
https://github.com/tgross35 updated
https://github.com/llvm/llvm-project/pull/76558
>From e869ad1bc601d95b6364dc5619e79a06e8b0fc82 Mon Sep 17 00:00:00 2001
From: Trevor Gross
Date: Fri, 11 Aug 2023 22:16:01 -0400
Subject: [PATCH 1/4] [IR] Add an xpassing test for `f128` intrinsic lowering
`f128` intrinsic functions lower to incorrect libc calls. Add a test
showing current behavior.
---
.../CodeGen/Generic/f128-math-lowering.ll | 610 ++
1 file changed, 610 insertions(+)
create mode 100644 llvm/test/CodeGen/Generic/f128-math-lowering.ll
diff --git a/llvm/test/CodeGen/Generic/f128-math-lowering.ll
b/llvm/test/CodeGen/Generic/f128-math-lowering.ll
new file mode 100644
index 00..8a70786d97fe67
--- /dev/null
+++ b/llvm/test/CodeGen/Generic/f128-math-lowering.ll
@@ -0,0 +1,610 @@
+
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
UTC_ARGS: --version 2
+;
+; RUN: llc < %s -mtriple=aarch64-unknown-unknown -verify-machineinstrs |
FileCheck %s --check-prefix=CHECK-AARCH64
+; RUN: llc < %s -mtriple=riscv32-unknown-unknown -verify-machineinstrs |
FileCheck %s --check-prefix=CHECK-RISCV32
+; RUN: llc < %s -mtriple=s390x-unknown-unknown -verify-machineinstrs |
FileCheck %s --check-prefix=CHECK-S390X
+; RUN: llc < %s -mtriple=i686-unknown-unknown -verify-machineinstrs |
FileCheck %s --check-prefix=CHECK-X86
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -verify-machineinstrs |
FileCheck %s --check-prefix=CHECK-X64
+;
+; Verify that fp128 intrinsics only lower to `long double` calls on platforms
+; where `f128` and `long double` have the same layout.
+;
+; We test on x86 and x64 which have 80-bit ld, as well as aarch64 (ld == f128),
+; riscv32 (ld == f64), and s380x (ld == f128 with different alignment from
+; x64/aarch64 f128).
+;
+; FIXME: these emit calls to long double functions but should emit f128 calls
+
+define fp128 @test_cbrtf128(fp128 %a) {
+; CHECK-LABEL: test_cbrtf128:
+; CHECK-AARCH64:b llvm.cbrt.f128
+; CHECK-RISCV32:call llvm.cbrt.f128@plt
+; CHECK-S390X: brasl {{%.*}} llvm.cbrt.f128@PLT
+; CHECK-X64:jmp llvm.cbrt.f128@PLT # TAILCALL
+; CHECK-X86:calll llvm.cbrt.f128@PLT
+start:
+ %0 = tail call fp128 @llvm.cbrt.f128(fp128 %a)
+ ret fp128 %0
+}
+
+declare fp128 @llvm.cbrt.f128(fp128)
+
+
+define fp128 @test_ceilf128(fp128 %a) {
+; CHECK-LABEL: test_ceilf128:
+; CHECK-AARCH64:b ceill
+; CHECK-RISCV32:call ceill@plt
+; CHECK-S390X: brasl {{%.*}} ceill@PLT
+; CHECK-X64:jmp ceill@PLT
+; CHECK-X86:calll ceill
+start:
+ %0 = tail call fp128 @llvm.ceil.f128(fp128 %a)
+ ret fp128 %0
+}
+
+declare fp128 @llvm.ceil.f128(fp128)
+
+
+define fp128 @test_copysignf128(fp128 %a, fp128 %b) {
+; No math library call here, so make sure the assembly does the correct thing.
+; This test is autogenerated
+; CHECK-LABEL:test_copysignf128:
+; CHECK-AARCH64-LABEL: test_copysignf128:
+; CHECK-AARCH64: // %bb.0: // %start
+; CHECK-AARCH64-NEXT:stp q0, q1, [sp, #-32]!
+; CHECK-AARCH64-NEXT:.cfi_def_cfa_offset 32
+; CHECK-AARCH64-NEXT:ldrb w8, [sp, #15]
+; CHECK-AARCH64-NEXT:ldrb w9, [sp, #31]
+; CHECK-AARCH64-NEXT:bfxil w9, w8, #0, #7
+; CHECK-AARCH64-NEXT:strb w9, [sp, #15]
+; CHECK-AARCH64-NEXT:ldr q0, [sp], #32
+; CHECK-AARCH64-NEXT:ret
+;
+; CHECK-RISCV32-LABEL: test_copysignf128:
+; CHECK-RISCV32: # %bb.0: # %start
+; CHECK-RISCV32-NEXT:lw a3, 0(a1)
+; CHECK-RISCV32-NEXT:lw a4, 4(a1)
+; CHECK-RISCV32-NEXT:lw a2, 12(a2)
+; CHECK-RISCV32-NEXT:lw a5, 12(a1)
+; CHECK-RISCV32-NEXT:lw a1, 8(a1)
+; CHECK-RISCV32-NEXT:lui a6, 524288
+; CHECK-RISCV32-NEXT:and a2, a2, a6
+; CHECK-RISCV32-NEXT:slli a5, a5, 1
+; CHECK-RISCV32-NEXT:srli a5, a5, 1
+; CHECK-RISCV32-NEXT:or a2, a5, a2
+; CHECK-RISCV32-NEXT:sw a1, 8(a0)
+; CHECK-RISCV32-NEXT:sw a4, 4(a0)
+; CHECK-RISCV32-NEXT:sw a3, 0(a0)
+; CHECK-RISCV32-NEXT:sw a2, 12(a0)
+; CHECK-RISCV32-NEXT:ret
+;
+; CHECK-S390X-LABEL: test_copysignf128:
+; CHECK-S390X: # %bb.0: # %start
+; CHECK-S390X-NEXT:ld %f0, 0(%r3)
+; CHECK-S390X-NEXT:ld %f2, 8(%r3)
+; CHECK-S390X-NEXT:ld %f1, 0(%r4)
+; CHECK-S390X-NEXT:ld %f3, 8(%r4)
+; CHECK-S390X-NEXT:cpsdr %f0, %f1, %f0
+; CHECK-S390X-NEXT:std %f0, 0(%r2)
+; CHECK-S390X-NEXT:std %f2, 8(%r2)
+; CHECK-S390X-NEXT:br %r14
+;
+; CHECK-X86-LABEL: test_copysignf128:
+; CHECK-X86: # %bb.0: # %start
+; CHECK-X86-NEXT:pushl %ebx
+; CHECK-X86-NEXT:.cfi_def_cfa_offset 8
+; CHECK-X86-NEXT:pushl %edi
+; CHECK-X86-NEXT:.cfi_def_cfa_offset 12
+; CHECK-X86-NEXT:pushl %esi
+; CHECK-X86-NEXT:.cfi_def_cfa_offset 16
+; CHECK-X86-NEXT:.cfi_offset %esi, -16
+; CHECK-X86-NEXT:.cfi_offset %edi, -12
+; CHECK-X86-NEXT:.cfi_offset %ebx, -8
+; CHECK-X86-NEXT:movl {{[0-9]+}}(%esp), %eax
+; CHECK-X86-NEXT:movl {{[0-9]+}}(%esp), %ecx
+; CHECK-X86-N
[clang] [llvm] [WIP] Correct lowering of `fp128` intrinsics (PR #76558)
https://github.com/tgross35 updated
https://github.com/llvm/llvm-project/pull/76558
>From 90a465d0a7e9744a4a8043152016e500927a0d95 Mon Sep 17 00:00:00 2001
From: Trevor Gross
Date: Fri, 11 Aug 2023 22:16:01 -0400
Subject: [PATCH 1/4] [IR] Add an xpassing test for `f128` intrinsic lowering
`f128` intrinsic functions lower to incorrect libc calls. Add a test
showing current behavior.
---
.../CodeGen/Generic/f128-math-lowering.ll | 610 ++
1 file changed, 610 insertions(+)
create mode 100644 llvm/test/CodeGen/Generic/f128-math-lowering.ll
diff --git a/llvm/test/CodeGen/Generic/f128-math-lowering.ll
b/llvm/test/CodeGen/Generic/f128-math-lowering.ll
new file mode 100644
index 00..8a70786d97fe67
--- /dev/null
+++ b/llvm/test/CodeGen/Generic/f128-math-lowering.ll
@@ -0,0 +1,610 @@
+
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
UTC_ARGS: --version 2
+;
+; RUN: llc < %s -mtriple=aarch64-unknown-unknown -verify-machineinstrs |
FileCheck %s --check-prefix=CHECK-AARCH64
+; RUN: llc < %s -mtriple=riscv32-unknown-unknown -verify-machineinstrs |
FileCheck %s --check-prefix=CHECK-RISCV32
+; RUN: llc < %s -mtriple=s390x-unknown-unknown -verify-machineinstrs |
FileCheck %s --check-prefix=CHECK-S390X
+; RUN: llc < %s -mtriple=i686-unknown-unknown -verify-machineinstrs |
FileCheck %s --check-prefix=CHECK-X86
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -verify-machineinstrs |
FileCheck %s --check-prefix=CHECK-X64
+;
+; Verify that fp128 intrinsics only lower to `long double` calls on platforms
+; where `f128` and `long double` have the same layout.
+;
+; We test on x86 and x64 which have 80-bit ld, as well as aarch64 (ld == f128),
+; riscv32 (ld == f64), and s380x (ld == f128 with different alignment from
+; x64/aarch64 f128).
+;
+; FIXME: these emit calls to long double functions but should emit f128 calls
+
+define fp128 @test_cbrtf128(fp128 %a) {
+; CHECK-LABEL: test_cbrtf128:
+; CHECK-AARCH64:b llvm.cbrt.f128
+; CHECK-RISCV32:call llvm.cbrt.f128@plt
+; CHECK-S390X: brasl {{%.*}} llvm.cbrt.f128@PLT
+; CHECK-X64:jmp llvm.cbrt.f128@PLT # TAILCALL
+; CHECK-X86:calll llvm.cbrt.f128@PLT
+start:
+ %0 = tail call fp128 @llvm.cbrt.f128(fp128 %a)
+ ret fp128 %0
+}
+
+declare fp128 @llvm.cbrt.f128(fp128)
+
+
+define fp128 @test_ceilf128(fp128 %a) {
+; CHECK-LABEL: test_ceilf128:
+; CHECK-AARCH64:b ceill
+; CHECK-RISCV32:call ceill@plt
+; CHECK-S390X: brasl {{%.*}} ceill@PLT
+; CHECK-X64:jmp ceill@PLT
+; CHECK-X86:calll ceill
+start:
+ %0 = tail call fp128 @llvm.ceil.f128(fp128 %a)
+ ret fp128 %0
+}
+
+declare fp128 @llvm.ceil.f128(fp128)
+
+
+define fp128 @test_copysignf128(fp128 %a, fp128 %b) {
+; No math library call here, so make sure the assembly does the correct thing.
+; This test is autogenerated
+; CHECK-LABEL:test_copysignf128:
+; CHECK-AARCH64-LABEL: test_copysignf128:
+; CHECK-AARCH64: // %bb.0: // %start
+; CHECK-AARCH64-NEXT:stp q0, q1, [sp, #-32]!
+; CHECK-AARCH64-NEXT:.cfi_def_cfa_offset 32
+; CHECK-AARCH64-NEXT:ldrb w8, [sp, #15]
+; CHECK-AARCH64-NEXT:ldrb w9, [sp, #31]
+; CHECK-AARCH64-NEXT:bfxil w9, w8, #0, #7
+; CHECK-AARCH64-NEXT:strb w9, [sp, #15]
+; CHECK-AARCH64-NEXT:ldr q0, [sp], #32
+; CHECK-AARCH64-NEXT:ret
+;
+; CHECK-RISCV32-LABEL: test_copysignf128:
+; CHECK-RISCV32: # %bb.0: # %start
+; CHECK-RISCV32-NEXT:lw a3, 0(a1)
+; CHECK-RISCV32-NEXT:lw a4, 4(a1)
+; CHECK-RISCV32-NEXT:lw a2, 12(a2)
+; CHECK-RISCV32-NEXT:lw a5, 12(a1)
+; CHECK-RISCV32-NEXT:lw a1, 8(a1)
+; CHECK-RISCV32-NEXT:lui a6, 524288
+; CHECK-RISCV32-NEXT:and a2, a2, a6
+; CHECK-RISCV32-NEXT:slli a5, a5, 1
+; CHECK-RISCV32-NEXT:srli a5, a5, 1
+; CHECK-RISCV32-NEXT:or a2, a5, a2
+; CHECK-RISCV32-NEXT:sw a1, 8(a0)
+; CHECK-RISCV32-NEXT:sw a4, 4(a0)
+; CHECK-RISCV32-NEXT:sw a3, 0(a0)
+; CHECK-RISCV32-NEXT:sw a2, 12(a0)
+; CHECK-RISCV32-NEXT:ret
+;
+; CHECK-S390X-LABEL: test_copysignf128:
+; CHECK-S390X: # %bb.0: # %start
+; CHECK-S390X-NEXT:ld %f0, 0(%r3)
+; CHECK-S390X-NEXT:ld %f2, 8(%r3)
+; CHECK-S390X-NEXT:ld %f1, 0(%r4)
+; CHECK-S390X-NEXT:ld %f3, 8(%r4)
+; CHECK-S390X-NEXT:cpsdr %f0, %f1, %f0
+; CHECK-S390X-NEXT:std %f0, 0(%r2)
+; CHECK-S390X-NEXT:std %f2, 8(%r2)
+; CHECK-S390X-NEXT:br %r14
+;
+; CHECK-X86-LABEL: test_copysignf128:
+; CHECK-X86: # %bb.0: # %start
+; CHECK-X86-NEXT:pushl %ebx
+; CHECK-X86-NEXT:.cfi_def_cfa_offset 8
+; CHECK-X86-NEXT:pushl %edi
+; CHECK-X86-NEXT:.cfi_def_cfa_offset 12
+; CHECK-X86-NEXT:pushl %esi
+; CHECK-X86-NEXT:.cfi_def_cfa_offset 16
+; CHECK-X86-NEXT:.cfi_offset %esi, -16
+; CHECK-X86-NEXT:.cfi_offset %edi, -12
+; CHECK-X86-NEXT:.cfi_offset %ebx, -8
+; CHECK-X86-NEXT:movl {{[0-9]+}}(%esp), %eax
+; CHECK-X86-NEXT:movl {{[0-9]+}}(%esp), %ecx
+; CHECK-X86-N
[clang] [llvm] [WIP] Correct lowering of `fp128` intrinsics (PR #76558)
https://github.com/tgross35 updated
https://github.com/llvm/llvm-project/pull/76558
>From 90a465d0a7e9744a4a8043152016e500927a0d95 Mon Sep 17 00:00:00 2001
From: Trevor Gross
Date: Fri, 11 Aug 2023 22:16:01 -0400
Subject: [PATCH 1/4] [IR] Add an xpassing test for `f128` intrinsic lowering
`f128` intrinsic functions lower to incorrect libc calls. Add a test
showing current behavior.
---
.../CodeGen/Generic/f128-math-lowering.ll | 610 ++
1 file changed, 610 insertions(+)
create mode 100644 llvm/test/CodeGen/Generic/f128-math-lowering.ll
diff --git a/llvm/test/CodeGen/Generic/f128-math-lowering.ll
b/llvm/test/CodeGen/Generic/f128-math-lowering.ll
new file mode 100644
index 00..8a70786d97fe67
--- /dev/null
+++ b/llvm/test/CodeGen/Generic/f128-math-lowering.ll
@@ -0,0 +1,610 @@
+
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
UTC_ARGS: --version 2
+;
+; RUN: llc < %s -mtriple=aarch64-unknown-unknown -verify-machineinstrs |
FileCheck %s --check-prefix=CHECK-AARCH64
+; RUN: llc < %s -mtriple=riscv32-unknown-unknown -verify-machineinstrs |
FileCheck %s --check-prefix=CHECK-RISCV32
+; RUN: llc < %s -mtriple=s390x-unknown-unknown -verify-machineinstrs |
FileCheck %s --check-prefix=CHECK-S390X
+; RUN: llc < %s -mtriple=i686-unknown-unknown -verify-machineinstrs |
FileCheck %s --check-prefix=CHECK-X86
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -verify-machineinstrs |
FileCheck %s --check-prefix=CHECK-X64
+;
+; Verify that fp128 intrinsics only lower to `long double` calls on platforms
+; where `f128` and `long double` have the same layout.
+;
+; We test on x86 and x64 which have 80-bit ld, as well as aarch64 (ld == f128),
+; riscv32 (ld == f64), and s380x (ld == f128 with different alignment from
+; x64/aarch64 f128).
+;
+; FIXME: these emit calls to long double functions but should emit f128 calls
+
+define fp128 @test_cbrtf128(fp128 %a) {
+; CHECK-LABEL: test_cbrtf128:
+; CHECK-AARCH64:b llvm.cbrt.f128
+; CHECK-RISCV32:call llvm.cbrt.f128@plt
+; CHECK-S390X: brasl {{%.*}} llvm.cbrt.f128@PLT
+; CHECK-X64:jmp llvm.cbrt.f128@PLT # TAILCALL
+; CHECK-X86:calll llvm.cbrt.f128@PLT
+start:
+ %0 = tail call fp128 @llvm.cbrt.f128(fp128 %a)
+ ret fp128 %0
+}
+
+declare fp128 @llvm.cbrt.f128(fp128)
+
+
+define fp128 @test_ceilf128(fp128 %a) {
+; CHECK-LABEL: test_ceilf128:
+; CHECK-AARCH64:b ceill
+; CHECK-RISCV32:call ceill@plt
+; CHECK-S390X: brasl {{%.*}} ceill@PLT
+; CHECK-X64:jmp ceill@PLT
+; CHECK-X86:calll ceill
+start:
+ %0 = tail call fp128 @llvm.ceil.f128(fp128 %a)
+ ret fp128 %0
+}
+
+declare fp128 @llvm.ceil.f128(fp128)
+
+
+define fp128 @test_copysignf128(fp128 %a, fp128 %b) {
+; No math library call here, so make sure the assembly does the correct thing.
+; This test is autogenerated
+; CHECK-LABEL:test_copysignf128:
+; CHECK-AARCH64-LABEL: test_copysignf128:
+; CHECK-AARCH64: // %bb.0: // %start
+; CHECK-AARCH64-NEXT:stp q0, q1, [sp, #-32]!
+; CHECK-AARCH64-NEXT:.cfi_def_cfa_offset 32
+; CHECK-AARCH64-NEXT:ldrb w8, [sp, #15]
+; CHECK-AARCH64-NEXT:ldrb w9, [sp, #31]
+; CHECK-AARCH64-NEXT:bfxil w9, w8, #0, #7
+; CHECK-AARCH64-NEXT:strb w9, [sp, #15]
+; CHECK-AARCH64-NEXT:ldr q0, [sp], #32
+; CHECK-AARCH64-NEXT:ret
+;
+; CHECK-RISCV32-LABEL: test_copysignf128:
+; CHECK-RISCV32: # %bb.0: # %start
+; CHECK-RISCV32-NEXT:lw a3, 0(a1)
+; CHECK-RISCV32-NEXT:lw a4, 4(a1)
+; CHECK-RISCV32-NEXT:lw a2, 12(a2)
+; CHECK-RISCV32-NEXT:lw a5, 12(a1)
+; CHECK-RISCV32-NEXT:lw a1, 8(a1)
+; CHECK-RISCV32-NEXT:lui a6, 524288
+; CHECK-RISCV32-NEXT:and a2, a2, a6
+; CHECK-RISCV32-NEXT:slli a5, a5, 1
+; CHECK-RISCV32-NEXT:srli a5, a5, 1
+; CHECK-RISCV32-NEXT:or a2, a5, a2
+; CHECK-RISCV32-NEXT:sw a1, 8(a0)
+; CHECK-RISCV32-NEXT:sw a4, 4(a0)
+; CHECK-RISCV32-NEXT:sw a3, 0(a0)
+; CHECK-RISCV32-NEXT:sw a2, 12(a0)
+; CHECK-RISCV32-NEXT:ret
+;
+; CHECK-S390X-LABEL: test_copysignf128:
+; CHECK-S390X: # %bb.0: # %start
+; CHECK-S390X-NEXT:ld %f0, 0(%r3)
+; CHECK-S390X-NEXT:ld %f2, 8(%r3)
+; CHECK-S390X-NEXT:ld %f1, 0(%r4)
+; CHECK-S390X-NEXT:ld %f3, 8(%r4)
+; CHECK-S390X-NEXT:cpsdr %f0, %f1, %f0
+; CHECK-S390X-NEXT:std %f0, 0(%r2)
+; CHECK-S390X-NEXT:std %f2, 8(%r2)
+; CHECK-S390X-NEXT:br %r14
+;
+; CHECK-X86-LABEL: test_copysignf128:
+; CHECK-X86: # %bb.0: # %start
+; CHECK-X86-NEXT:pushl %ebx
+; CHECK-X86-NEXT:.cfi_def_cfa_offset 8
+; CHECK-X86-NEXT:pushl %edi
+; CHECK-X86-NEXT:.cfi_def_cfa_offset 12
+; CHECK-X86-NEXT:pushl %esi
+; CHECK-X86-NEXT:.cfi_def_cfa_offset 16
+; CHECK-X86-NEXT:.cfi_offset %esi, -16
+; CHECK-X86-NEXT:.cfi_offset %edi, -12
+; CHECK-X86-NEXT:.cfi_offset %ebx, -8
+; CHECK-X86-NEXT:movl {{[0-9]+}}(%esp), %eax
+; CHECK-X86-NEXT:movl {{[0-9]+}}(%esp), %ecx
+; CHECK-X86-N
[clang] [llvm] [WIP] Correct lowering of `fp128` intrinsics (PR #76558)
https://github.com/tgross35 updated
https://github.com/llvm/llvm-project/pull/76558
>From 90a465d0a7e9744a4a8043152016e500927a0d95 Mon Sep 17 00:00:00 2001
From: Trevor Gross
Date: Fri, 11 Aug 2023 22:16:01 -0400
Subject: [PATCH 1/4] [IR] Add an xpassing test for `f128` intrinsic lowering
`f128` intrinsic functions lower to incorrect libc calls. Add a test
showing current behavior.
---
.../CodeGen/Generic/f128-math-lowering.ll | 610 ++
1 file changed, 610 insertions(+)
create mode 100644 llvm/test/CodeGen/Generic/f128-math-lowering.ll
diff --git a/llvm/test/CodeGen/Generic/f128-math-lowering.ll
b/llvm/test/CodeGen/Generic/f128-math-lowering.ll
new file mode 100644
index 00..8a70786d97fe67
--- /dev/null
+++ b/llvm/test/CodeGen/Generic/f128-math-lowering.ll
@@ -0,0 +1,610 @@
+
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
UTC_ARGS: --version 2
+;
+; RUN: llc < %s -mtriple=aarch64-unknown-unknown -verify-machineinstrs |
FileCheck %s --check-prefix=CHECK-AARCH64
+; RUN: llc < %s -mtriple=riscv32-unknown-unknown -verify-machineinstrs |
FileCheck %s --check-prefix=CHECK-RISCV32
+; RUN: llc < %s -mtriple=s390x-unknown-unknown -verify-machineinstrs |
FileCheck %s --check-prefix=CHECK-S390X
+; RUN: llc < %s -mtriple=i686-unknown-unknown -verify-machineinstrs |
FileCheck %s --check-prefix=CHECK-X86
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -verify-machineinstrs |
FileCheck %s --check-prefix=CHECK-X64
+;
+; Verify that fp128 intrinsics only lower to `long double` calls on platforms
+; where `f128` and `long double` have the same layout.
+;
+; We test on x86 and x64 which have 80-bit ld, as well as aarch64 (ld == f128),
+; riscv32 (ld == f64), and s380x (ld == f128 with different alignment from
+; x64/aarch64 f128).
+;
+; FIXME: these emit calls to long double functions but should emit f128 calls
+
+define fp128 @test_cbrtf128(fp128 %a) {
+; CHECK-LABEL: test_cbrtf128:
+; CHECK-AARCH64:b llvm.cbrt.f128
+; CHECK-RISCV32:call llvm.cbrt.f128@plt
+; CHECK-S390X: brasl {{%.*}} llvm.cbrt.f128@PLT
+; CHECK-X64:jmp llvm.cbrt.f128@PLT # TAILCALL
+; CHECK-X86:calll llvm.cbrt.f128@PLT
+start:
+ %0 = tail call fp128 @llvm.cbrt.f128(fp128 %a)
+ ret fp128 %0
+}
+
+declare fp128 @llvm.cbrt.f128(fp128)
+
+
+define fp128 @test_ceilf128(fp128 %a) {
+; CHECK-LABEL: test_ceilf128:
+; CHECK-AARCH64:b ceill
+; CHECK-RISCV32:call ceill@plt
+; CHECK-S390X: brasl {{%.*}} ceill@PLT
+; CHECK-X64:jmp ceill@PLT
+; CHECK-X86:calll ceill
+start:
+ %0 = tail call fp128 @llvm.ceil.f128(fp128 %a)
+ ret fp128 %0
+}
+
+declare fp128 @llvm.ceil.f128(fp128)
+
+
+define fp128 @test_copysignf128(fp128 %a, fp128 %b) {
+; No math library call here, so make sure the assembly does the correct thing.
+; This test is autogenerated
+; CHECK-LABEL:test_copysignf128:
+; CHECK-AARCH64-LABEL: test_copysignf128:
+; CHECK-AARCH64: // %bb.0: // %start
+; CHECK-AARCH64-NEXT:stp q0, q1, [sp, #-32]!
+; CHECK-AARCH64-NEXT:.cfi_def_cfa_offset 32
+; CHECK-AARCH64-NEXT:ldrb w8, [sp, #15]
+; CHECK-AARCH64-NEXT:ldrb w9, [sp, #31]
+; CHECK-AARCH64-NEXT:bfxil w9, w8, #0, #7
+; CHECK-AARCH64-NEXT:strb w9, [sp, #15]
+; CHECK-AARCH64-NEXT:ldr q0, [sp], #32
+; CHECK-AARCH64-NEXT:ret
+;
+; CHECK-RISCV32-LABEL: test_copysignf128:
+; CHECK-RISCV32: # %bb.0: # %start
+; CHECK-RISCV32-NEXT:lw a3, 0(a1)
+; CHECK-RISCV32-NEXT:lw a4, 4(a1)
+; CHECK-RISCV32-NEXT:lw a2, 12(a2)
+; CHECK-RISCV32-NEXT:lw a5, 12(a1)
+; CHECK-RISCV32-NEXT:lw a1, 8(a1)
+; CHECK-RISCV32-NEXT:lui a6, 524288
+; CHECK-RISCV32-NEXT:and a2, a2, a6
+; CHECK-RISCV32-NEXT:slli a5, a5, 1
+; CHECK-RISCV32-NEXT:srli a5, a5, 1
+; CHECK-RISCV32-NEXT:or a2, a5, a2
+; CHECK-RISCV32-NEXT:sw a1, 8(a0)
+; CHECK-RISCV32-NEXT:sw a4, 4(a0)
+; CHECK-RISCV32-NEXT:sw a3, 0(a0)
+; CHECK-RISCV32-NEXT:sw a2, 12(a0)
+; CHECK-RISCV32-NEXT:ret
+;
+; CHECK-S390X-LABEL: test_copysignf128:
+; CHECK-S390X: # %bb.0: # %start
+; CHECK-S390X-NEXT:ld %f0, 0(%r3)
+; CHECK-S390X-NEXT:ld %f2, 8(%r3)
+; CHECK-S390X-NEXT:ld %f1, 0(%r4)
+; CHECK-S390X-NEXT:ld %f3, 8(%r4)
+; CHECK-S390X-NEXT:cpsdr %f0, %f1, %f0
+; CHECK-S390X-NEXT:std %f0, 0(%r2)
+; CHECK-S390X-NEXT:std %f2, 8(%r2)
+; CHECK-S390X-NEXT:br %r14
+;
+; CHECK-X86-LABEL: test_copysignf128:
+; CHECK-X86: # %bb.0: # %start
+; CHECK-X86-NEXT:pushl %ebx
+; CHECK-X86-NEXT:.cfi_def_cfa_offset 8
+; CHECK-X86-NEXT:pushl %edi
+; CHECK-X86-NEXT:.cfi_def_cfa_offset 12
+; CHECK-X86-NEXT:pushl %esi
+; CHECK-X86-NEXT:.cfi_def_cfa_offset 16
+; CHECK-X86-NEXT:.cfi_offset %esi, -16
+; CHECK-X86-NEXT:.cfi_offset %edi, -12
+; CHECK-X86-NEXT:.cfi_offset %ebx, -8
+; CHECK-X86-NEXT:movl {{[0-9]+}}(%esp), %eax
+; CHECK-X86-NEXT:movl {{[0-9]+}}(%esp), %ecx
+; CHECK-X86-N
[clang] [llvm] [WIP] Correct lowering of `fp128` intrinsics (PR #76558)
https://github.com/tgross35 updated
https://github.com/llvm/llvm-project/pull/76558
>From 946581e0c6a06be92b16d74199b58a72be4b76f3 Mon Sep 17 00:00:00 2001
From: Trevor Gross
Date: Fri, 11 Aug 2023 22:16:01 -0400
Subject: [PATCH 1/4] [IR] Add an xpassing test for `f128` intrinsic lowering
`f128` intrinsic functions lower to incorrect libc calls. Add a test
showing current behavior.
---
.../CodeGen/Generic/f128-math-lowering.ll | 610 ++
1 file changed, 610 insertions(+)
create mode 100644 llvm/test/CodeGen/Generic/f128-math-lowering.ll
diff --git a/llvm/test/CodeGen/Generic/f128-math-lowering.ll
b/llvm/test/CodeGen/Generic/f128-math-lowering.ll
new file mode 100644
index 00..8a70786d97fe67
--- /dev/null
+++ b/llvm/test/CodeGen/Generic/f128-math-lowering.ll
@@ -0,0 +1,610 @@
+
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
UTC_ARGS: --version 2
+;
+; RUN: llc < %s -mtriple=aarch64-unknown-unknown -verify-machineinstrs |
FileCheck %s --check-prefix=CHECK-AARCH64
+; RUN: llc < %s -mtriple=riscv32-unknown-unknown -verify-machineinstrs |
FileCheck %s --check-prefix=CHECK-RISCV32
+; RUN: llc < %s -mtriple=s390x-unknown-unknown -verify-machineinstrs |
FileCheck %s --check-prefix=CHECK-S390X
+; RUN: llc < %s -mtriple=i686-unknown-unknown -verify-machineinstrs |
FileCheck %s --check-prefix=CHECK-X86
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -verify-machineinstrs |
FileCheck %s --check-prefix=CHECK-X64
+;
+; Verify that fp128 intrinsics only lower to `long double` calls on platforms
+; where `f128` and `long double` have the same layout.
+;
+; We test on x86 and x64 which have 80-bit ld, as well as aarch64 (ld == f128),
+; riscv32 (ld == f64), and s380x (ld == f128 with different alignment from
+; x64/aarch64 f128).
+;
+; FIXME: these emit calls to long double functions but should emit f128 calls
+
+define fp128 @test_cbrtf128(fp128 %a) {
+; CHECK-LABEL: test_cbrtf128:
+; CHECK-AARCH64:b llvm.cbrt.f128
+; CHECK-RISCV32:call llvm.cbrt.f128@plt
+; CHECK-S390X: brasl {{%.*}} llvm.cbrt.f128@PLT
+; CHECK-X64:jmp llvm.cbrt.f128@PLT # TAILCALL
+; CHECK-X86:calll llvm.cbrt.f128@PLT
+start:
+ %0 = tail call fp128 @llvm.cbrt.f128(fp128 %a)
+ ret fp128 %0
+}
+
+declare fp128 @llvm.cbrt.f128(fp128)
+
+
+define fp128 @test_ceilf128(fp128 %a) {
+; CHECK-LABEL: test_ceilf128:
+; CHECK-AARCH64:b ceill
+; CHECK-RISCV32:call ceill@plt
+; CHECK-S390X: brasl {{%.*}} ceill@PLT
+; CHECK-X64:jmp ceill@PLT
+; CHECK-X86:calll ceill
+start:
+ %0 = tail call fp128 @llvm.ceil.f128(fp128 %a)
+ ret fp128 %0
+}
+
+declare fp128 @llvm.ceil.f128(fp128)
+
+
+define fp128 @test_copysignf128(fp128 %a, fp128 %b) {
+; No math library call here, so make sure the assembly does the correct thing.
+; This test is autogenerated
+; CHECK-LABEL:test_copysignf128:
+; CHECK-AARCH64-LABEL: test_copysignf128:
+; CHECK-AARCH64: // %bb.0: // %start
+; CHECK-AARCH64-NEXT:stp q0, q1, [sp, #-32]!
+; CHECK-AARCH64-NEXT:.cfi_def_cfa_offset 32
+; CHECK-AARCH64-NEXT:ldrb w8, [sp, #15]
+; CHECK-AARCH64-NEXT:ldrb w9, [sp, #31]
+; CHECK-AARCH64-NEXT:bfxil w9, w8, #0, #7
+; CHECK-AARCH64-NEXT:strb w9, [sp, #15]
+; CHECK-AARCH64-NEXT:ldr q0, [sp], #32
+; CHECK-AARCH64-NEXT:ret
+;
+; CHECK-RISCV32-LABEL: test_copysignf128:
+; CHECK-RISCV32: # %bb.0: # %start
+; CHECK-RISCV32-NEXT:lw a3, 0(a1)
+; CHECK-RISCV32-NEXT:lw a4, 4(a1)
+; CHECK-RISCV32-NEXT:lw a2, 12(a2)
+; CHECK-RISCV32-NEXT:lw a5, 12(a1)
+; CHECK-RISCV32-NEXT:lw a1, 8(a1)
+; CHECK-RISCV32-NEXT:lui a6, 524288
+; CHECK-RISCV32-NEXT:and a2, a2, a6
+; CHECK-RISCV32-NEXT:slli a5, a5, 1
+; CHECK-RISCV32-NEXT:srli a5, a5, 1
+; CHECK-RISCV32-NEXT:or a2, a5, a2
+; CHECK-RISCV32-NEXT:sw a1, 8(a0)
+; CHECK-RISCV32-NEXT:sw a4, 4(a0)
+; CHECK-RISCV32-NEXT:sw a3, 0(a0)
+; CHECK-RISCV32-NEXT:sw a2, 12(a0)
+; CHECK-RISCV32-NEXT:ret
+;
+; CHECK-S390X-LABEL: test_copysignf128:
+; CHECK-S390X: # %bb.0: # %start
+; CHECK-S390X-NEXT:ld %f0, 0(%r3)
+; CHECK-S390X-NEXT:ld %f2, 8(%r3)
+; CHECK-S390X-NEXT:ld %f1, 0(%r4)
+; CHECK-S390X-NEXT:ld %f3, 8(%r4)
+; CHECK-S390X-NEXT:cpsdr %f0, %f1, %f0
+; CHECK-S390X-NEXT:std %f0, 0(%r2)
+; CHECK-S390X-NEXT:std %f2, 8(%r2)
+; CHECK-S390X-NEXT:br %r14
+;
+; CHECK-X86-LABEL: test_copysignf128:
+; CHECK-X86: # %bb.0: # %start
+; CHECK-X86-NEXT:pushl %ebx
+; CHECK-X86-NEXT:.cfi_def_cfa_offset 8
+; CHECK-X86-NEXT:pushl %edi
+; CHECK-X86-NEXT:.cfi_def_cfa_offset 12
+; CHECK-X86-NEXT:pushl %esi
+; CHECK-X86-NEXT:.cfi_def_cfa_offset 16
+; CHECK-X86-NEXT:.cfi_offset %esi, -16
+; CHECK-X86-NEXT:.cfi_offset %edi, -12
+; CHECK-X86-NEXT:.cfi_offset %ebx, -8
+; CHECK-X86-NEXT:movl {{[0-9]+}}(%esp), %eax
+; CHECK-X86-NEXT:movl {{[0-9]+}}(%esp), %ecx
+; CHECK-X86-N
[clang] [llvm] [WIP] Correct lowering of `fp128` intrinsics (PR #76558)
https://github.com/tgross35 updated
https://github.com/llvm/llvm-project/pull/76558
>From 590f4920ceb1a80d711d39624b0249cd9ff774d2 Mon Sep 17 00:00:00 2001
From: Trevor Gross
Date: Fri, 11 Aug 2023 22:16:01 -0400
Subject: [PATCH 1/4] [IR] Add an xpassing test for `f128` intrinsic lowering
`f128` intrinsic functions lower to incorrect libc calls. Add a test
showing current behavior.
---
.../CodeGen/Generic/f128-math-lowering.ll | 610 ++
1 file changed, 610 insertions(+)
create mode 100644 llvm/test/CodeGen/Generic/f128-math-lowering.ll
diff --git a/llvm/test/CodeGen/Generic/f128-math-lowering.ll
b/llvm/test/CodeGen/Generic/f128-math-lowering.ll
new file mode 100644
index 00..8a70786d97fe67
--- /dev/null
+++ b/llvm/test/CodeGen/Generic/f128-math-lowering.ll
@@ -0,0 +1,610 @@
+
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
UTC_ARGS: --version 2
+;
+; RUN: llc < %s -mtriple=aarch64-unknown-unknown -verify-machineinstrs |
FileCheck %s --check-prefix=CHECK-AARCH64
+; RUN: llc < %s -mtriple=riscv32-unknown-unknown -verify-machineinstrs |
FileCheck %s --check-prefix=CHECK-RISCV32
+; RUN: llc < %s -mtriple=s390x-unknown-unknown -verify-machineinstrs |
FileCheck %s --check-prefix=CHECK-S390X
+; RUN: llc < %s -mtriple=i686-unknown-unknown -verify-machineinstrs |
FileCheck %s --check-prefix=CHECK-X86
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -verify-machineinstrs |
FileCheck %s --check-prefix=CHECK-X64
+;
+; Verify that fp128 intrinsics only lower to `long double` calls on platforms
+; where `f128` and `long double` have the same layout.
+;
+; We test on x86 and x64 which have 80-bit ld, as well as aarch64 (ld == f128),
+; riscv32 (ld == f64), and s380x (ld == f128 with different alignment from
+; x64/aarch64 f128).
+;
+; FIXME: these emit calls to long double functions but should emit f128 calls
+
+define fp128 @test_cbrtf128(fp128 %a) {
+; CHECK-LABEL: test_cbrtf128:
+; CHECK-AARCH64:b llvm.cbrt.f128
+; CHECK-RISCV32:call llvm.cbrt.f128@plt
+; CHECK-S390X: brasl {{%.*}} llvm.cbrt.f128@PLT
+; CHECK-X64:jmp llvm.cbrt.f128@PLT # TAILCALL
+; CHECK-X86:calll llvm.cbrt.f128@PLT
+start:
+ %0 = tail call fp128 @llvm.cbrt.f128(fp128 %a)
+ ret fp128 %0
+}
+
+declare fp128 @llvm.cbrt.f128(fp128)
+
+
+define fp128 @test_ceilf128(fp128 %a) {
+; CHECK-LABEL: test_ceilf128:
+; CHECK-AARCH64:b ceill
+; CHECK-RISCV32:call ceill@plt
+; CHECK-S390X: brasl {{%.*}} ceill@PLT
+; CHECK-X64:jmp ceill@PLT
+; CHECK-X86:calll ceill
+start:
+ %0 = tail call fp128 @llvm.ceil.f128(fp128 %a)
+ ret fp128 %0
+}
+
+declare fp128 @llvm.ceil.f128(fp128)
+
+
+define fp128 @test_copysignf128(fp128 %a, fp128 %b) {
+; No math library call here, so make sure the assembly does the correct thing.
+; This test is autogenerated
+; CHECK-LABEL:test_copysignf128:
+; CHECK-AARCH64-LABEL: test_copysignf128:
+; CHECK-AARCH64: // %bb.0: // %start
+; CHECK-AARCH64-NEXT:stp q0, q1, [sp, #-32]!
+; CHECK-AARCH64-NEXT:.cfi_def_cfa_offset 32
+; CHECK-AARCH64-NEXT:ldrb w8, [sp, #15]
+; CHECK-AARCH64-NEXT:ldrb w9, [sp, #31]
+; CHECK-AARCH64-NEXT:bfxil w9, w8, #0, #7
+; CHECK-AARCH64-NEXT:strb w9, [sp, #15]
+; CHECK-AARCH64-NEXT:ldr q0, [sp], #32
+; CHECK-AARCH64-NEXT:ret
+;
+; CHECK-RISCV32-LABEL: test_copysignf128:
+; CHECK-RISCV32: # %bb.0: # %start
+; CHECK-RISCV32-NEXT:lw a3, 0(a1)
+; CHECK-RISCV32-NEXT:lw a4, 4(a1)
+; CHECK-RISCV32-NEXT:lw a2, 12(a2)
+; CHECK-RISCV32-NEXT:lw a5, 12(a1)
+; CHECK-RISCV32-NEXT:lw a1, 8(a1)
+; CHECK-RISCV32-NEXT:lui a6, 524288
+; CHECK-RISCV32-NEXT:and a2, a2, a6
+; CHECK-RISCV32-NEXT:slli a5, a5, 1
+; CHECK-RISCV32-NEXT:srli a5, a5, 1
+; CHECK-RISCV32-NEXT:or a2, a5, a2
+; CHECK-RISCV32-NEXT:sw a1, 8(a0)
+; CHECK-RISCV32-NEXT:sw a4, 4(a0)
+; CHECK-RISCV32-NEXT:sw a3, 0(a0)
+; CHECK-RISCV32-NEXT:sw a2, 12(a0)
+; CHECK-RISCV32-NEXT:ret
+;
+; CHECK-S390X-LABEL: test_copysignf128:
+; CHECK-S390X: # %bb.0: # %start
+; CHECK-S390X-NEXT:ld %f0, 0(%r3)
+; CHECK-S390X-NEXT:ld %f2, 8(%r3)
+; CHECK-S390X-NEXT:ld %f1, 0(%r4)
+; CHECK-S390X-NEXT:ld %f3, 8(%r4)
+; CHECK-S390X-NEXT:cpsdr %f0, %f1, %f0
+; CHECK-S390X-NEXT:std %f0, 0(%r2)
+; CHECK-S390X-NEXT:std %f2, 8(%r2)
+; CHECK-S390X-NEXT:br %r14
+;
+; CHECK-X86-LABEL: test_copysignf128:
+; CHECK-X86: # %bb.0: # %start
+; CHECK-X86-NEXT:pushl %ebx
+; CHECK-X86-NEXT:.cfi_def_cfa_offset 8
+; CHECK-X86-NEXT:pushl %edi
+; CHECK-X86-NEXT:.cfi_def_cfa_offset 12
+; CHECK-X86-NEXT:pushl %esi
+; CHECK-X86-NEXT:.cfi_def_cfa_offset 16
+; CHECK-X86-NEXT:.cfi_offset %esi, -16
+; CHECK-X86-NEXT:.cfi_offset %edi, -12
+; CHECK-X86-NEXT:.cfi_offset %ebx, -8
+; CHECK-X86-NEXT:movl {{[0-9]+}}(%esp), %eax
+; CHECK-X86-NEXT:movl {{[0-9]+}}(%esp), %ecx
+; CHECK-X86-N
[clang] [llvm] [WIP] Correct lowering of `fp128` intrinsics (PR #76558)
https://github.com/tgross35 updated
https://github.com/llvm/llvm-project/pull/76558
>From 590f4920ceb1a80d711d39624b0249cd9ff774d2 Mon Sep 17 00:00:00 2001
From: Trevor Gross
Date: Fri, 11 Aug 2023 22:16:01 -0400
Subject: [PATCH 1/4] [IR] Add an xpassing test for `f128` intrinsic lowering
`f128` intrinsic functions lower to incorrect libc calls. Add a test
showing current behavior.
---
.../CodeGen/Generic/f128-math-lowering.ll | 610 ++
1 file changed, 610 insertions(+)
create mode 100644 llvm/test/CodeGen/Generic/f128-math-lowering.ll
diff --git a/llvm/test/CodeGen/Generic/f128-math-lowering.ll
b/llvm/test/CodeGen/Generic/f128-math-lowering.ll
new file mode 100644
index 00..8a70786d97fe67
--- /dev/null
+++ b/llvm/test/CodeGen/Generic/f128-math-lowering.ll
@@ -0,0 +1,610 @@
+
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
UTC_ARGS: --version 2
+;
+; RUN: llc < %s -mtriple=aarch64-unknown-unknown -verify-machineinstrs |
FileCheck %s --check-prefix=CHECK-AARCH64
+; RUN: llc < %s -mtriple=riscv32-unknown-unknown -verify-machineinstrs |
FileCheck %s --check-prefix=CHECK-RISCV32
+; RUN: llc < %s -mtriple=s390x-unknown-unknown -verify-machineinstrs |
FileCheck %s --check-prefix=CHECK-S390X
+; RUN: llc < %s -mtriple=i686-unknown-unknown -verify-machineinstrs |
FileCheck %s --check-prefix=CHECK-X86
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -verify-machineinstrs |
FileCheck %s --check-prefix=CHECK-X64
+;
+; Verify that fp128 intrinsics only lower to `long double` calls on platforms
+; where `f128` and `long double` have the same layout.
+;
+; We test on x86 and x64 which have 80-bit ld, as well as aarch64 (ld == f128),
+; riscv32 (ld == f64), and s380x (ld == f128 with different alignment from
+; x64/aarch64 f128).
+;
+; FIXME: these emit calls to long double functions but should emit f128 calls
+
+define fp128 @test_cbrtf128(fp128 %a) {
+; CHECK-LABEL: test_cbrtf128:
+; CHECK-AARCH64:b llvm.cbrt.f128
+; CHECK-RISCV32:call llvm.cbrt.f128@plt
+; CHECK-S390X: brasl {{%.*}} llvm.cbrt.f128@PLT
+; CHECK-X64:jmp llvm.cbrt.f128@PLT # TAILCALL
+; CHECK-X86:calll llvm.cbrt.f128@PLT
+start:
+ %0 = tail call fp128 @llvm.cbrt.f128(fp128 %a)
+ ret fp128 %0
+}
+
+declare fp128 @llvm.cbrt.f128(fp128)
+
+
+define fp128 @test_ceilf128(fp128 %a) {
+; CHECK-LABEL: test_ceilf128:
+; CHECK-AARCH64:b ceill
+; CHECK-RISCV32:call ceill@plt
+; CHECK-S390X: brasl {{%.*}} ceill@PLT
+; CHECK-X64:jmp ceill@PLT
+; CHECK-X86:calll ceill
+start:
+ %0 = tail call fp128 @llvm.ceil.f128(fp128 %a)
+ ret fp128 %0
+}
+
+declare fp128 @llvm.ceil.f128(fp128)
+
+
+define fp128 @test_copysignf128(fp128 %a, fp128 %b) {
+; No math library call here, so make sure the assembly does the correct thing.
+; This test is autogenerated
+; CHECK-LABEL:test_copysignf128:
+; CHECK-AARCH64-LABEL: test_copysignf128:
+; CHECK-AARCH64: // %bb.0: // %start
+; CHECK-AARCH64-NEXT:stp q0, q1, [sp, #-32]!
+; CHECK-AARCH64-NEXT:.cfi_def_cfa_offset 32
+; CHECK-AARCH64-NEXT:ldrb w8, [sp, #15]
+; CHECK-AARCH64-NEXT:ldrb w9, [sp, #31]
+; CHECK-AARCH64-NEXT:bfxil w9, w8, #0, #7
+; CHECK-AARCH64-NEXT:strb w9, [sp, #15]
+; CHECK-AARCH64-NEXT:ldr q0, [sp], #32
+; CHECK-AARCH64-NEXT:ret
+;
+; CHECK-RISCV32-LABEL: test_copysignf128:
+; CHECK-RISCV32: # %bb.0: # %start
+; CHECK-RISCV32-NEXT:lw a3, 0(a1)
+; CHECK-RISCV32-NEXT:lw a4, 4(a1)
+; CHECK-RISCV32-NEXT:lw a2, 12(a2)
+; CHECK-RISCV32-NEXT:lw a5, 12(a1)
+; CHECK-RISCV32-NEXT:lw a1, 8(a1)
+; CHECK-RISCV32-NEXT:lui a6, 524288
+; CHECK-RISCV32-NEXT:and a2, a2, a6
+; CHECK-RISCV32-NEXT:slli a5, a5, 1
+; CHECK-RISCV32-NEXT:srli a5, a5, 1
+; CHECK-RISCV32-NEXT:or a2, a5, a2
+; CHECK-RISCV32-NEXT:sw a1, 8(a0)
+; CHECK-RISCV32-NEXT:sw a4, 4(a0)
+; CHECK-RISCV32-NEXT:sw a3, 0(a0)
+; CHECK-RISCV32-NEXT:sw a2, 12(a0)
+; CHECK-RISCV32-NEXT:ret
+;
+; CHECK-S390X-LABEL: test_copysignf128:
+; CHECK-S390X: # %bb.0: # %start
+; CHECK-S390X-NEXT:ld %f0, 0(%r3)
+; CHECK-S390X-NEXT:ld %f2, 8(%r3)
+; CHECK-S390X-NEXT:ld %f1, 0(%r4)
+; CHECK-S390X-NEXT:ld %f3, 8(%r4)
+; CHECK-S390X-NEXT:cpsdr %f0, %f1, %f0
+; CHECK-S390X-NEXT:std %f0, 0(%r2)
+; CHECK-S390X-NEXT:std %f2, 8(%r2)
+; CHECK-S390X-NEXT:br %r14
+;
+; CHECK-X86-LABEL: test_copysignf128:
+; CHECK-X86: # %bb.0: # %start
+; CHECK-X86-NEXT:pushl %ebx
+; CHECK-X86-NEXT:.cfi_def_cfa_offset 8
+; CHECK-X86-NEXT:pushl %edi
+; CHECK-X86-NEXT:.cfi_def_cfa_offset 12
+; CHECK-X86-NEXT:pushl %esi
+; CHECK-X86-NEXT:.cfi_def_cfa_offset 16
+; CHECK-X86-NEXT:.cfi_offset %esi, -16
+; CHECK-X86-NEXT:.cfi_offset %edi, -12
+; CHECK-X86-NEXT:.cfi_offset %ebx, -8
+; CHECK-X86-NEXT:movl {{[0-9]+}}(%esp), %eax
+; CHECK-X86-NEXT:movl {{[0-9]+}}(%esp), %ecx
+; CHECK-X86-N
[clang] [llvm] [WIP] Correct lowering of `fp128` intrinsics (PR #76558)
https://github.com/tgross35 updated
https://github.com/llvm/llvm-project/pull/76558
>From 590f4920ceb1a80d711d39624b0249cd9ff774d2 Mon Sep 17 00:00:00 2001
From: Trevor Gross
Date: Fri, 11 Aug 2023 22:16:01 -0400
Subject: [PATCH 1/4] [IR] Add an xpassing test for `f128` intrinsic lowering
`f128` intrinsic functions lower to incorrect libc calls. Add a test
showing current behavior.
---
.../CodeGen/Generic/f128-math-lowering.ll | 610 ++
1 file changed, 610 insertions(+)
create mode 100644 llvm/test/CodeGen/Generic/f128-math-lowering.ll
diff --git a/llvm/test/CodeGen/Generic/f128-math-lowering.ll
b/llvm/test/CodeGen/Generic/f128-math-lowering.ll
new file mode 100644
index 00..8a70786d97fe67
--- /dev/null
+++ b/llvm/test/CodeGen/Generic/f128-math-lowering.ll
@@ -0,0 +1,610 @@
+
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
UTC_ARGS: --version 2
+;
+; RUN: llc < %s -mtriple=aarch64-unknown-unknown -verify-machineinstrs |
FileCheck %s --check-prefix=CHECK-AARCH64
+; RUN: llc < %s -mtriple=riscv32-unknown-unknown -verify-machineinstrs |
FileCheck %s --check-prefix=CHECK-RISCV32
+; RUN: llc < %s -mtriple=s390x-unknown-unknown -verify-machineinstrs |
FileCheck %s --check-prefix=CHECK-S390X
+; RUN: llc < %s -mtriple=i686-unknown-unknown -verify-machineinstrs |
FileCheck %s --check-prefix=CHECK-X86
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -verify-machineinstrs |
FileCheck %s --check-prefix=CHECK-X64
+;
+; Verify that fp128 intrinsics only lower to `long double` calls on platforms
+; where `f128` and `long double` have the same layout.
+;
+; We test on x86 and x64 which have 80-bit ld, as well as aarch64 (ld == f128),
+; riscv32 (ld == f64), and s380x (ld == f128 with different alignment from
+; x64/aarch64 f128).
+;
+; FIXME: these emit calls to long double functions but should emit f128 calls
+
+define fp128 @test_cbrtf128(fp128 %a) {
+; CHECK-LABEL: test_cbrtf128:
+; CHECK-AARCH64:b llvm.cbrt.f128
+; CHECK-RISCV32:call llvm.cbrt.f128@plt
+; CHECK-S390X: brasl {{%.*}} llvm.cbrt.f128@PLT
+; CHECK-X64:jmp llvm.cbrt.f128@PLT # TAILCALL
+; CHECK-X86:calll llvm.cbrt.f128@PLT
+start:
+ %0 = tail call fp128 @llvm.cbrt.f128(fp128 %a)
+ ret fp128 %0
+}
+
+declare fp128 @llvm.cbrt.f128(fp128)
+
+
+define fp128 @test_ceilf128(fp128 %a) {
+; CHECK-LABEL: test_ceilf128:
+; CHECK-AARCH64:b ceill
+; CHECK-RISCV32:call ceill@plt
+; CHECK-S390X: brasl {{%.*}} ceill@PLT
+; CHECK-X64:jmp ceill@PLT
+; CHECK-X86:calll ceill
+start:
+ %0 = tail call fp128 @llvm.ceil.f128(fp128 %a)
+ ret fp128 %0
+}
+
+declare fp128 @llvm.ceil.f128(fp128)
+
+
+define fp128 @test_copysignf128(fp128 %a, fp128 %b) {
+; No math library call here, so make sure the assembly does the correct thing.
+; This test is autogenerated
+; CHECK-LABEL:test_copysignf128:
+; CHECK-AARCH64-LABEL: test_copysignf128:
+; CHECK-AARCH64: // %bb.0: // %start
+; CHECK-AARCH64-NEXT:stp q0, q1, [sp, #-32]!
+; CHECK-AARCH64-NEXT:.cfi_def_cfa_offset 32
+; CHECK-AARCH64-NEXT:ldrb w8, [sp, #15]
+; CHECK-AARCH64-NEXT:ldrb w9, [sp, #31]
+; CHECK-AARCH64-NEXT:bfxil w9, w8, #0, #7
+; CHECK-AARCH64-NEXT:strb w9, [sp, #15]
+; CHECK-AARCH64-NEXT:ldr q0, [sp], #32
+; CHECK-AARCH64-NEXT:ret
+;
+; CHECK-RISCV32-LABEL: test_copysignf128:
+; CHECK-RISCV32: # %bb.0: # %start
+; CHECK-RISCV32-NEXT:lw a3, 0(a1)
+; CHECK-RISCV32-NEXT:lw a4, 4(a1)
+; CHECK-RISCV32-NEXT:lw a2, 12(a2)
+; CHECK-RISCV32-NEXT:lw a5, 12(a1)
+; CHECK-RISCV32-NEXT:lw a1, 8(a1)
+; CHECK-RISCV32-NEXT:lui a6, 524288
+; CHECK-RISCV32-NEXT:and a2, a2, a6
+; CHECK-RISCV32-NEXT:slli a5, a5, 1
+; CHECK-RISCV32-NEXT:srli a5, a5, 1
+; CHECK-RISCV32-NEXT:or a2, a5, a2
+; CHECK-RISCV32-NEXT:sw a1, 8(a0)
+; CHECK-RISCV32-NEXT:sw a4, 4(a0)
+; CHECK-RISCV32-NEXT:sw a3, 0(a0)
+; CHECK-RISCV32-NEXT:sw a2, 12(a0)
+; CHECK-RISCV32-NEXT:ret
+;
+; CHECK-S390X-LABEL: test_copysignf128:
+; CHECK-S390X: # %bb.0: # %start
+; CHECK-S390X-NEXT:ld %f0, 0(%r3)
+; CHECK-S390X-NEXT:ld %f2, 8(%r3)
+; CHECK-S390X-NEXT:ld %f1, 0(%r4)
+; CHECK-S390X-NEXT:ld %f3, 8(%r4)
+; CHECK-S390X-NEXT:cpsdr %f0, %f1, %f0
+; CHECK-S390X-NEXT:std %f0, 0(%r2)
+; CHECK-S390X-NEXT:std %f2, 8(%r2)
+; CHECK-S390X-NEXT:br %r14
+;
+; CHECK-X86-LABEL: test_copysignf128:
+; CHECK-X86: # %bb.0: # %start
+; CHECK-X86-NEXT:pushl %ebx
+; CHECK-X86-NEXT:.cfi_def_cfa_offset 8
+; CHECK-X86-NEXT:pushl %edi
+; CHECK-X86-NEXT:.cfi_def_cfa_offset 12
+; CHECK-X86-NEXT:pushl %esi
+; CHECK-X86-NEXT:.cfi_def_cfa_offset 16
+; CHECK-X86-NEXT:.cfi_offset %esi, -16
+; CHECK-X86-NEXT:.cfi_offset %edi, -12
+; CHECK-X86-NEXT:.cfi_offset %ebx, -8
+; CHECK-X86-NEXT:movl {{[0-9]+}}(%esp), %eax
+; CHECK-X86-NEXT:movl {{[0-9]+}}(%esp), %ecx
+; CHECK-X86-N
[clang] [llvm] [WIP] Correct lowering of `fp128` intrinsics (PR #76558)
https://github.com/tgross35 updated
https://github.com/llvm/llvm-project/pull/76558
>From 590f4920ceb1a80d711d39624b0249cd9ff774d2 Mon Sep 17 00:00:00 2001
From: Trevor Gross
Date: Fri, 11 Aug 2023 22:16:01 -0400
Subject: [PATCH 1/4] [IR] Add an xpassing test for `f128` intrinsic lowering
`f128` intrinsic functions lower to incorrect libc calls. Add a test
showing current behavior.
---
.../CodeGen/Generic/f128-math-lowering.ll | 610 ++
1 file changed, 610 insertions(+)
create mode 100644 llvm/test/CodeGen/Generic/f128-math-lowering.ll
diff --git a/llvm/test/CodeGen/Generic/f128-math-lowering.ll
b/llvm/test/CodeGen/Generic/f128-math-lowering.ll
new file mode 100644
index 00..8a70786d97fe67
--- /dev/null
+++ b/llvm/test/CodeGen/Generic/f128-math-lowering.ll
@@ -0,0 +1,610 @@
+
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
UTC_ARGS: --version 2
+;
+; RUN: llc < %s -mtriple=aarch64-unknown-unknown -verify-machineinstrs |
FileCheck %s --check-prefix=CHECK-AARCH64
+; RUN: llc < %s -mtriple=riscv32-unknown-unknown -verify-machineinstrs |
FileCheck %s --check-prefix=CHECK-RISCV32
+; RUN: llc < %s -mtriple=s390x-unknown-unknown -verify-machineinstrs |
FileCheck %s --check-prefix=CHECK-S390X
+; RUN: llc < %s -mtriple=i686-unknown-unknown -verify-machineinstrs |
FileCheck %s --check-prefix=CHECK-X86
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -verify-machineinstrs |
FileCheck %s --check-prefix=CHECK-X64
+;
+; Verify that fp128 intrinsics only lower to `long double` calls on platforms
+; where `f128` and `long double` have the same layout.
+;
+; We test on x86 and x64 which have 80-bit ld, as well as aarch64 (ld == f128),
+; riscv32 (ld == f64), and s380x (ld == f128 with different alignment from
+; x64/aarch64 f128).
+;
+; FIXME: these emit calls to long double functions but should emit f128 calls
+
+define fp128 @test_cbrtf128(fp128 %a) {
+; CHECK-LABEL: test_cbrtf128:
+; CHECK-AARCH64:b llvm.cbrt.f128
+; CHECK-RISCV32:call llvm.cbrt.f128@plt
+; CHECK-S390X: brasl {{%.*}} llvm.cbrt.f128@PLT
+; CHECK-X64:jmp llvm.cbrt.f128@PLT # TAILCALL
+; CHECK-X86:calll llvm.cbrt.f128@PLT
+start:
+ %0 = tail call fp128 @llvm.cbrt.f128(fp128 %a)
+ ret fp128 %0
+}
+
+declare fp128 @llvm.cbrt.f128(fp128)
+
+
+define fp128 @test_ceilf128(fp128 %a) {
+; CHECK-LABEL: test_ceilf128:
+; CHECK-AARCH64:b ceill
+; CHECK-RISCV32:call ceill@plt
+; CHECK-S390X: brasl {{%.*}} ceill@PLT
+; CHECK-X64:jmp ceill@PLT
+; CHECK-X86:calll ceill
+start:
+ %0 = tail call fp128 @llvm.ceil.f128(fp128 %a)
+ ret fp128 %0
+}
+
+declare fp128 @llvm.ceil.f128(fp128)
+
+
+define fp128 @test_copysignf128(fp128 %a, fp128 %b) {
+; No math library call here, so make sure the assembly does the correct thing.
+; This test is autogenerated
+; CHECK-LABEL:test_copysignf128:
+; CHECK-AARCH64-LABEL: test_copysignf128:
+; CHECK-AARCH64: // %bb.0: // %start
+; CHECK-AARCH64-NEXT:stp q0, q1, [sp, #-32]!
+; CHECK-AARCH64-NEXT:.cfi_def_cfa_offset 32
+; CHECK-AARCH64-NEXT:ldrb w8, [sp, #15]
+; CHECK-AARCH64-NEXT:ldrb w9, [sp, #31]
+; CHECK-AARCH64-NEXT:bfxil w9, w8, #0, #7
+; CHECK-AARCH64-NEXT:strb w9, [sp, #15]
+; CHECK-AARCH64-NEXT:ldr q0, [sp], #32
+; CHECK-AARCH64-NEXT:ret
+;
+; CHECK-RISCV32-LABEL: test_copysignf128:
+; CHECK-RISCV32: # %bb.0: # %start
+; CHECK-RISCV32-NEXT:lw a3, 0(a1)
+; CHECK-RISCV32-NEXT:lw a4, 4(a1)
+; CHECK-RISCV32-NEXT:lw a2, 12(a2)
+; CHECK-RISCV32-NEXT:lw a5, 12(a1)
+; CHECK-RISCV32-NEXT:lw a1, 8(a1)
+; CHECK-RISCV32-NEXT:lui a6, 524288
+; CHECK-RISCV32-NEXT:and a2, a2, a6
+; CHECK-RISCV32-NEXT:slli a5, a5, 1
+; CHECK-RISCV32-NEXT:srli a5, a5, 1
+; CHECK-RISCV32-NEXT:or a2, a5, a2
+; CHECK-RISCV32-NEXT:sw a1, 8(a0)
+; CHECK-RISCV32-NEXT:sw a4, 4(a0)
+; CHECK-RISCV32-NEXT:sw a3, 0(a0)
+; CHECK-RISCV32-NEXT:sw a2, 12(a0)
+; CHECK-RISCV32-NEXT:ret
+;
+; CHECK-S390X-LABEL: test_copysignf128:
+; CHECK-S390X: # %bb.0: # %start
+; CHECK-S390X-NEXT:ld %f0, 0(%r3)
+; CHECK-S390X-NEXT:ld %f2, 8(%r3)
+; CHECK-S390X-NEXT:ld %f1, 0(%r4)
+; CHECK-S390X-NEXT:ld %f3, 8(%r4)
+; CHECK-S390X-NEXT:cpsdr %f0, %f1, %f0
+; CHECK-S390X-NEXT:std %f0, 0(%r2)
+; CHECK-S390X-NEXT:std %f2, 8(%r2)
+; CHECK-S390X-NEXT:br %r14
+;
+; CHECK-X86-LABEL: test_copysignf128:
+; CHECK-X86: # %bb.0: # %start
+; CHECK-X86-NEXT:pushl %ebx
+; CHECK-X86-NEXT:.cfi_def_cfa_offset 8
+; CHECK-X86-NEXT:pushl %edi
+; CHECK-X86-NEXT:.cfi_def_cfa_offset 12
+; CHECK-X86-NEXT:pushl %esi
+; CHECK-X86-NEXT:.cfi_def_cfa_offset 16
+; CHECK-X86-NEXT:.cfi_offset %esi, -16
+; CHECK-X86-NEXT:.cfi_offset %edi, -12
+; CHECK-X86-NEXT:.cfi_offset %ebx, -8
+; CHECK-X86-NEXT:movl {{[0-9]+}}(%esp), %eax
+; CHECK-X86-NEXT:movl {{[0-9]+}}(%esp), %ecx
+; CHECK-X86-N
[clang] [llvm] [WIP] Correct lowering of `fp128` intrinsics (PR #76558)
https://github.com/tgross35 updated
https://github.com/llvm/llvm-project/pull/76558
>From 590f4920ceb1a80d711d39624b0249cd9ff774d2 Mon Sep 17 00:00:00 2001
From: Trevor Gross
Date: Fri, 11 Aug 2023 22:16:01 -0400
Subject: [PATCH 1/4] [IR] Add an xpassing test for `f128` intrinsic lowering
`f128` intrinsic functions lower to incorrect libc calls. Add a test
showing current behavior.
---
.../CodeGen/Generic/f128-math-lowering.ll | 610 ++
1 file changed, 610 insertions(+)
create mode 100644 llvm/test/CodeGen/Generic/f128-math-lowering.ll
diff --git a/llvm/test/CodeGen/Generic/f128-math-lowering.ll
b/llvm/test/CodeGen/Generic/f128-math-lowering.ll
new file mode 100644
index 00..8a70786d97fe67
--- /dev/null
+++ b/llvm/test/CodeGen/Generic/f128-math-lowering.ll
@@ -0,0 +1,610 @@
+
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
UTC_ARGS: --version 2
+;
+; RUN: llc < %s -mtriple=aarch64-unknown-unknown -verify-machineinstrs |
FileCheck %s --check-prefix=CHECK-AARCH64
+; RUN: llc < %s -mtriple=riscv32-unknown-unknown -verify-machineinstrs |
FileCheck %s --check-prefix=CHECK-RISCV32
+; RUN: llc < %s -mtriple=s390x-unknown-unknown -verify-machineinstrs |
FileCheck %s --check-prefix=CHECK-S390X
+; RUN: llc < %s -mtriple=i686-unknown-unknown -verify-machineinstrs |
FileCheck %s --check-prefix=CHECK-X86
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -verify-machineinstrs |
FileCheck %s --check-prefix=CHECK-X64
+;
+; Verify that fp128 intrinsics only lower to `long double` calls on platforms
+; where `f128` and `long double` have the same layout.
+;
+; We test on x86 and x64 which have 80-bit ld, as well as aarch64 (ld == f128),
+; riscv32 (ld == f64), and s380x (ld == f128 with different alignment from
+; x64/aarch64 f128).
+;
+; FIXME: these emit calls to long double functions but should emit f128 calls
+
+define fp128 @test_cbrtf128(fp128 %a) {
+; CHECK-LABEL: test_cbrtf128:
+; CHECK-AARCH64:b llvm.cbrt.f128
+; CHECK-RISCV32:call llvm.cbrt.f128@plt
+; CHECK-S390X: brasl {{%.*}} llvm.cbrt.f128@PLT
+; CHECK-X64:jmp llvm.cbrt.f128@PLT # TAILCALL
+; CHECK-X86:calll llvm.cbrt.f128@PLT
+start:
+ %0 = tail call fp128 @llvm.cbrt.f128(fp128 %a)
+ ret fp128 %0
+}
+
+declare fp128 @llvm.cbrt.f128(fp128)
+
+
+define fp128 @test_ceilf128(fp128 %a) {
+; CHECK-LABEL: test_ceilf128:
+; CHECK-AARCH64:b ceill
+; CHECK-RISCV32:call ceill@plt
+; CHECK-S390X: brasl {{%.*}} ceill@PLT
+; CHECK-X64:jmp ceill@PLT
+; CHECK-X86:calll ceill
+start:
+ %0 = tail call fp128 @llvm.ceil.f128(fp128 %a)
+ ret fp128 %0
+}
+
+declare fp128 @llvm.ceil.f128(fp128)
+
+
+define fp128 @test_copysignf128(fp128 %a, fp128 %b) {
+; No math library call here, so make sure the assembly does the correct thing.
+; This test is autogenerated
+; CHECK-LABEL:test_copysignf128:
+; CHECK-AARCH64-LABEL: test_copysignf128:
+; CHECK-AARCH64: // %bb.0: // %start
+; CHECK-AARCH64-NEXT:stp q0, q1, [sp, #-32]!
+; CHECK-AARCH64-NEXT:.cfi_def_cfa_offset 32
+; CHECK-AARCH64-NEXT:ldrb w8, [sp, #15]
+; CHECK-AARCH64-NEXT:ldrb w9, [sp, #31]
+; CHECK-AARCH64-NEXT:bfxil w9, w8, #0, #7
+; CHECK-AARCH64-NEXT:strb w9, [sp, #15]
+; CHECK-AARCH64-NEXT:ldr q0, [sp], #32
+; CHECK-AARCH64-NEXT:ret
+;
+; CHECK-RISCV32-LABEL: test_copysignf128:
+; CHECK-RISCV32: # %bb.0: # %start
+; CHECK-RISCV32-NEXT:lw a3, 0(a1)
+; CHECK-RISCV32-NEXT:lw a4, 4(a1)
+; CHECK-RISCV32-NEXT:lw a2, 12(a2)
+; CHECK-RISCV32-NEXT:lw a5, 12(a1)
+; CHECK-RISCV32-NEXT:lw a1, 8(a1)
+; CHECK-RISCV32-NEXT:lui a6, 524288
+; CHECK-RISCV32-NEXT:and a2, a2, a6
+; CHECK-RISCV32-NEXT:slli a5, a5, 1
+; CHECK-RISCV32-NEXT:srli a5, a5, 1
+; CHECK-RISCV32-NEXT:or a2, a5, a2
+; CHECK-RISCV32-NEXT:sw a1, 8(a0)
+; CHECK-RISCV32-NEXT:sw a4, 4(a0)
+; CHECK-RISCV32-NEXT:sw a3, 0(a0)
+; CHECK-RISCV32-NEXT:sw a2, 12(a0)
+; CHECK-RISCV32-NEXT:ret
+;
+; CHECK-S390X-LABEL: test_copysignf128:
+; CHECK-S390X: # %bb.0: # %start
+; CHECK-S390X-NEXT:ld %f0, 0(%r3)
+; CHECK-S390X-NEXT:ld %f2, 8(%r3)
+; CHECK-S390X-NEXT:ld %f1, 0(%r4)
+; CHECK-S390X-NEXT:ld %f3, 8(%r4)
+; CHECK-S390X-NEXT:cpsdr %f0, %f1, %f0
+; CHECK-S390X-NEXT:std %f0, 0(%r2)
+; CHECK-S390X-NEXT:std %f2, 8(%r2)
+; CHECK-S390X-NEXT:br %r14
+;
+; CHECK-X86-LABEL: test_copysignf128:
+; CHECK-X86: # %bb.0: # %start
+; CHECK-X86-NEXT:pushl %ebx
+; CHECK-X86-NEXT:.cfi_def_cfa_offset 8
+; CHECK-X86-NEXT:pushl %edi
+; CHECK-X86-NEXT:.cfi_def_cfa_offset 12
+; CHECK-X86-NEXT:pushl %esi
+; CHECK-X86-NEXT:.cfi_def_cfa_offset 16
+; CHECK-X86-NEXT:.cfi_offset %esi, -16
+; CHECK-X86-NEXT:.cfi_offset %edi, -12
+; CHECK-X86-NEXT:.cfi_offset %ebx, -8
+; CHECK-X86-NEXT:movl {{[0-9]+}}(%esp), %eax
+; CHECK-X86-NEXT:movl {{[0-9]+}}(%esp), %ecx
+; CHECK-X86-N
[clang] [llvm] [WIP] Correct lowering of `fp128` intrinsics (PR #76558)
https://github.com/tgross35 updated
https://github.com/llvm/llvm-project/pull/76558
>From 590f4920ceb1a80d711d39624b0249cd9ff774d2 Mon Sep 17 00:00:00 2001
From: Trevor Gross
Date: Fri, 11 Aug 2023 22:16:01 -0400
Subject: [PATCH 1/4] [IR] Add an xpassing test for `f128` intrinsic lowering
`f128` intrinsic functions lower to incorrect libc calls. Add a test
showing current behavior.
---
.../CodeGen/Generic/f128-math-lowering.ll | 610 ++
1 file changed, 610 insertions(+)
create mode 100644 llvm/test/CodeGen/Generic/f128-math-lowering.ll
diff --git a/llvm/test/CodeGen/Generic/f128-math-lowering.ll
b/llvm/test/CodeGen/Generic/f128-math-lowering.ll
new file mode 100644
index 00..8a70786d97fe67
--- /dev/null
+++ b/llvm/test/CodeGen/Generic/f128-math-lowering.ll
@@ -0,0 +1,610 @@
+
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
UTC_ARGS: --version 2
+;
+; RUN: llc < %s -mtriple=aarch64-unknown-unknown -verify-machineinstrs |
FileCheck %s --check-prefix=CHECK-AARCH64
+; RUN: llc < %s -mtriple=riscv32-unknown-unknown -verify-machineinstrs |
FileCheck %s --check-prefix=CHECK-RISCV32
+; RUN: llc < %s -mtriple=s390x-unknown-unknown -verify-machineinstrs |
FileCheck %s --check-prefix=CHECK-S390X
+; RUN: llc < %s -mtriple=i686-unknown-unknown -verify-machineinstrs |
FileCheck %s --check-prefix=CHECK-X86
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -verify-machineinstrs |
FileCheck %s --check-prefix=CHECK-X64
+;
+; Verify that fp128 intrinsics only lower to `long double` calls on platforms
+; where `f128` and `long double` have the same layout.
+;
+; We test on x86 and x64 which have 80-bit ld, as well as aarch64 (ld == f128),
+; riscv32 (ld == f64), and s380x (ld == f128 with different alignment from
+; x64/aarch64 f128).
+;
+; FIXME: these emit calls to long double functions but should emit f128 calls
+
+define fp128 @test_cbrtf128(fp128 %a) {
+; CHECK-LABEL: test_cbrtf128:
+; CHECK-AARCH64:b llvm.cbrt.f128
+; CHECK-RISCV32:call llvm.cbrt.f128@plt
+; CHECK-S390X: brasl {{%.*}} llvm.cbrt.f128@PLT
+; CHECK-X64:jmp llvm.cbrt.f128@PLT # TAILCALL
+; CHECK-X86:calll llvm.cbrt.f128@PLT
+start:
+ %0 = tail call fp128 @llvm.cbrt.f128(fp128 %a)
+ ret fp128 %0
+}
+
+declare fp128 @llvm.cbrt.f128(fp128)
+
+
+define fp128 @test_ceilf128(fp128 %a) {
+; CHECK-LABEL: test_ceilf128:
+; CHECK-AARCH64:b ceill
+; CHECK-RISCV32:call ceill@plt
+; CHECK-S390X: brasl {{%.*}} ceill@PLT
+; CHECK-X64:jmp ceill@PLT
+; CHECK-X86:calll ceill
+start:
+ %0 = tail call fp128 @llvm.ceil.f128(fp128 %a)
+ ret fp128 %0
+}
+
+declare fp128 @llvm.ceil.f128(fp128)
+
+
+define fp128 @test_copysignf128(fp128 %a, fp128 %b) {
+; No math library call here, so make sure the assembly does the correct thing.
+; This test is autogenerated
+; CHECK-LABEL:test_copysignf128:
+; CHECK-AARCH64-LABEL: test_copysignf128:
+; CHECK-AARCH64: // %bb.0: // %start
+; CHECK-AARCH64-NEXT:stp q0, q1, [sp, #-32]!
+; CHECK-AARCH64-NEXT:.cfi_def_cfa_offset 32
+; CHECK-AARCH64-NEXT:ldrb w8, [sp, #15]
+; CHECK-AARCH64-NEXT:ldrb w9, [sp, #31]
+; CHECK-AARCH64-NEXT:bfxil w9, w8, #0, #7
+; CHECK-AARCH64-NEXT:strb w9, [sp, #15]
+; CHECK-AARCH64-NEXT:ldr q0, [sp], #32
+; CHECK-AARCH64-NEXT:ret
+;
+; CHECK-RISCV32-LABEL: test_copysignf128:
+; CHECK-RISCV32: # %bb.0: # %start
+; CHECK-RISCV32-NEXT:lw a3, 0(a1)
+; CHECK-RISCV32-NEXT:lw a4, 4(a1)
+; CHECK-RISCV32-NEXT:lw a2, 12(a2)
+; CHECK-RISCV32-NEXT:lw a5, 12(a1)
+; CHECK-RISCV32-NEXT:lw a1, 8(a1)
+; CHECK-RISCV32-NEXT:lui a6, 524288
+; CHECK-RISCV32-NEXT:and a2, a2, a6
+; CHECK-RISCV32-NEXT:slli a5, a5, 1
+; CHECK-RISCV32-NEXT:srli a5, a5, 1
+; CHECK-RISCV32-NEXT:or a2, a5, a2
+; CHECK-RISCV32-NEXT:sw a1, 8(a0)
+; CHECK-RISCV32-NEXT:sw a4, 4(a0)
+; CHECK-RISCV32-NEXT:sw a3, 0(a0)
+; CHECK-RISCV32-NEXT:sw a2, 12(a0)
+; CHECK-RISCV32-NEXT:ret
+;
+; CHECK-S390X-LABEL: test_copysignf128:
+; CHECK-S390X: # %bb.0: # %start
+; CHECK-S390X-NEXT:ld %f0, 0(%r3)
+; CHECK-S390X-NEXT:ld %f2, 8(%r3)
+; CHECK-S390X-NEXT:ld %f1, 0(%r4)
+; CHECK-S390X-NEXT:ld %f3, 8(%r4)
+; CHECK-S390X-NEXT:cpsdr %f0, %f1, %f0
+; CHECK-S390X-NEXT:std %f0, 0(%r2)
+; CHECK-S390X-NEXT:std %f2, 8(%r2)
+; CHECK-S390X-NEXT:br %r14
+;
+; CHECK-X86-LABEL: test_copysignf128:
+; CHECK-X86: # %bb.0: # %start
+; CHECK-X86-NEXT:pushl %ebx
+; CHECK-X86-NEXT:.cfi_def_cfa_offset 8
+; CHECK-X86-NEXT:pushl %edi
+; CHECK-X86-NEXT:.cfi_def_cfa_offset 12
+; CHECK-X86-NEXT:pushl %esi
+; CHECK-X86-NEXT:.cfi_def_cfa_offset 16
+; CHECK-X86-NEXT:.cfi_offset %esi, -16
+; CHECK-X86-NEXT:.cfi_offset %edi, -12
+; CHECK-X86-NEXT:.cfi_offset %ebx, -8
+; CHECK-X86-NEXT:movl {{[0-9]+}}(%esp), %eax
+; CHECK-X86-NEXT:movl {{[0-9]+}}(%esp), %ecx
+; CHECK-X86-N
[clang] [llvm] [WIP] Correct lowering of `fp128` intrinsics (PR #76558)
https://github.com/tgross35 updated
https://github.com/llvm/llvm-project/pull/76558
>From 7df4ef93989b1913d9200fbc29d6d04f9e59d51a Mon Sep 17 00:00:00 2001
From: Trevor Gross
Date: Fri, 11 Aug 2023 22:16:01 -0400
Subject: [PATCH 1/4] [IR] Add an xpassing test for `f128` intrinsic lowering
`f128` intrinsic functions lower to incorrect libc calls. Add a test
showing current behavior.
---
.../CodeGen/Generic/f128-math-lowering.ll | 610 ++
1 file changed, 610 insertions(+)
create mode 100644 llvm/test/CodeGen/Generic/f128-math-lowering.ll
diff --git a/llvm/test/CodeGen/Generic/f128-math-lowering.ll
b/llvm/test/CodeGen/Generic/f128-math-lowering.ll
new file mode 100644
index 00..30efb8ef34918e
--- /dev/null
+++ b/llvm/test/CodeGen/Generic/f128-math-lowering.ll
@@ -0,0 +1,610 @@
+
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
UTC_ARGS: --version 2
+;;
+; RUN: llc < %s -mtriple=aarch64-unknown-unknown -verify-machineinstrs |
FileCheck %s --check-prefix=CHECK-AARCH64
+; RUN: llc < %s -mtriple=riscv32-unknown-unknown -verify-machineinstrs |
FileCheck %s --check-prefix=CHECK-RISCV32
+; RUN: llc < %s -mtriple=s390x-unknown-unknown -verify-machineinstrs |
FileCheck %s --check-prefix=CHECK-S390X
+; RUN: llc < %s -mtriple=i686-unknown-unknown -verify-machineinstrs |
FileCheck %s --check-prefix=CHECK-X86
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -verify-machineinstrs |
FileCheck %s --check-prefix=CHECK-X64
+;
+; Verify that fp128 intrinsics only lower to `long double` calls on platforms
+; where `f128` and `long double` have the same layout.
+;
+; We test on x86 and x64 which have 80-bit ld, as well as aarch64 (ld == f128),
+; riscv32 (ld == f64), and s380x (ld == f128 with different alignment from
+; x64/aarch64 f128).
+;
+; FIXME: these emit calls to long double functions but should emit f128 calls
+
+define fp128 @test_cbrtf128(fp128 %a) {
+; CHECK-LABEL: test_cbrtf128:
+; CHECK-AARCH64:b llvm.cbrt.f128
+; CHECK-RISCV32:call llvm.cbrt.f128@plt
+; CHECK-S390X: brasl {{%.*}} llvm.cbrt.f128@PLT
+; CHECK-X64:jmp llvm.cbrt.f128@PLT # TAILCALL
+; CHECK-X86:calll llvm.cbrt.f128@PLT
+start:
+ %0 = tail call fp128 @llvm.cbrt.f128(fp128 %a)
+ ret fp128 %0
+}
+
+declare fp128 @llvm.cbrt.f128(fp128)
+
+
+define fp128 @test_ceilf128(fp128 %a) {
+; CHECK-LABEL: test_ceilf128:
+; CHECK-AARCH64:b ceill
+; CHECK-RISCV32:call ceill@plt
+; CHECK-S390X: brasl {{%.*}} ceill@PLT
+; CHECK-X64:jmp ceill@PLT
+; CHECK-X86:calll ceill
+start:
+ %0 = tail call fp128 @llvm.ceil.f128(fp128 %a)
+ ret fp128 %0
+}
+
+declare fp128 @llvm.ceil.f128(fp128)
+
+
+define fp128 @test_copysignf128(fp128 %a, fp128 %b) {
+; No math library call here, so make sure the assembly does the correct thing.
+; This test is autogenerated
+; CHECK-LABEL:test_copysignf128:
+; CHECK-AARCH64-LABEL: test_copysignf128:
+; CHECK-AARCH64: // %bb.0: // %start
+; CHECK-AARCH64-NEXT:stp q0, q1, [sp, #-32]!
+; CHECK-AARCH64-NEXT:.cfi_def_cfa_offset 32
+; CHECK-AARCH64-NEXT:ldrb w8, [sp, #15]
+; CHECK-AARCH64-NEXT:ldrb w9, [sp, #31]
+; CHECK-AARCH64-NEXT:bfxil w9, w8, #0, #7
+; CHECK-AARCH64-NEXT:strb w9, [sp, #15]
+; CHECK-AARCH64-NEXT:ldr q0, [sp], #32
+; CHECK-AARCH64-NEXT:ret
+;
+; CHECK-RISCV32-LABEL: test_copysignf128:
+; CHECK-RISCV32: # %bb.0: # %start
+; CHECK-RISCV32-NEXT:lw a3, 0(a1)
+; CHECK-RISCV32-NEXT:lw a4, 4(a1)
+; CHECK-RISCV32-NEXT:lw a2, 12(a2)
+; CHECK-RISCV32-NEXT:lw a5, 12(a1)
+; CHECK-RISCV32-NEXT:lw a1, 8(a1)
+; CHECK-RISCV32-NEXT:lui a6, 524288
+; CHECK-RISCV32-NEXT:and a2, a2, a6
+; CHECK-RISCV32-NEXT:slli a5, a5, 1
+; CHECK-RISCV32-NEXT:srli a5, a5, 1
+; CHECK-RISCV32-NEXT:or a2, a5, a2
+; CHECK-RISCV32-NEXT:sw a1, 8(a0)
+; CHECK-RISCV32-NEXT:sw a4, 4(a0)
+; CHECK-RISCV32-NEXT:sw a3, 0(a0)
+; CHECK-RISCV32-NEXT:sw a2, 12(a0)
+; CHECK-RISCV32-NEXT:ret
+;
+; CHECK-S390X-LABEL: test_copysignf128:
+; CHECK-S390X: # %bb.0: # %start
+; CHECK-S390X-NEXT:ld %f0, 0(%r3)
+; CHECK-S390X-NEXT:ld %f2, 8(%r3)
+; CHECK-S390X-NEXT:ld %f1, 0(%r4)
+; CHECK-S390X-NEXT:ld %f3, 8(%r4)
+; CHECK-S390X-NEXT:cpsdr %f0, %f1, %f0
+; CHECK-S390X-NEXT:std %f0, 0(%r2)
+; CHECK-S390X-NEXT:std %f2, 8(%r2)
+; CHECK-S390X-NEXT:br %r14
+;
+; CHECK-X86-LABEL: test_copysignf128:
+; CHECK-X86: # %bb.0: # %start
+; CHECK-X86-NEXT:pushl %ebx
+; CHECK-X86-NEXT:.cfi_def_cfa_offset 8
+; CHECK-X86-NEXT:pushl %edi
+; CHECK-X86-NEXT:.cfi_def_cfa_offset 12
+; CHECK-X86-NEXT:pushl %esi
+; CHECK-X86-NEXT:.cfi_def_cfa_offset 16
+; CHECK-X86-NEXT:.cfi_offset %esi, -16
+; CHECK-X86-NEXT:.cfi_offset %edi, -12
+; CHECK-X86-NEXT:.cfi_offset %ebx, -8
+; CHECK-X86-NEXT:movl {{[0-9]+}}(%esp), %eax
+; CHECK-X86-NEXT:movl {{[0-9]+}}(%esp), %ecx
+; CHECK-X86-
[clang] [llvm] [WIP] Correct lowering of `fp128` intrinsics (PR #76558)
tgross35 wrote: @efriedma-quic was looking at this on phabricator https://github.com/llvm/llvm-project/pull/76558 ___ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
