[clang] [llvm] [WIP] Correct lowering of `fp128` intrinsics (PR #76558)

2025-04-27 Thread Trevor Gross via cfe-commits

https://github.com/tgross35 updated 
https://github.com/llvm/llvm-project/pull/76558

>From dafb3a378876db6b3bf505b425b386fd8f79c918 Mon Sep 17 00:00:00 2001
From: Trevor Gross 
Date: Fri, 11 Aug 2023 22:16:01 -0400
Subject: [PATCH 1/7] [IR] Add a test for `f128` libcall lowering (NFC)

`f128` intrinsic functions sometimes lower to `long double` library
calls when they instead need to be `f128` versions. Add a test
demonstrating current behavior.
---
 .../CodeGen/Generic/f128-math-lowering.ll | 328 ++
 1 file changed, 328 insertions(+)
 create mode 100644 llvm/test/CodeGen/Generic/f128-math-lowering.ll

diff --git a/llvm/test/CodeGen/Generic/f128-math-lowering.ll 
b/llvm/test/CodeGen/Generic/f128-math-lowering.ll
new file mode 100644
index 0..dfbd1eaeda109
--- /dev/null
+++ b/llvm/test/CodeGen/Generic/f128-math-lowering.ll
@@ -0,0 +1,328 @@
+; RUN: llc < %s -mtriple=aarch64-unknown-none -verify-machineinstrs | 
FileCheck %s --check-prefix=CHECK-USELD
+; RUN: llc < %s -mtriple=i686-unknown -verify-machineinstrs | FileCheck %s 
--check-prefix=CHECK-USELD
+; RUN: llc < %s -mtriple=riscv32 -verify-machineinstrs | FileCheck %s 
--check-prefix=CHECK-USELD
+; RUN: llc < %s -mtriple=s390x-unknown -verify-machineinstrs | FileCheck %s 
--check-prefix=CHECK-S390X
+; RUN: llc < %s -mtriple=x86_64-unknown -verify-machineinstrs | FileCheck %s 
--check-prefix=CHECK-USELD
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -verify-machineinstrs | 
FileCheck %s --check-prefix=CHECK-NOTLD
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-musl -verify-machineinstrs | 
FileCheck %s --check-prefix=CHECK-USELD
+;
+; REQUIRES: aarch64-registered-target
+; REQUIRES: riscv-registered-target
+; REQUIRES: systemz-registered-target
+; REQUIRES: x86-registered-target
+;
+; Verify that fp128 intrinsics only lower to `long double` calls (e.g. `sinl`)
+; on platforms where `f128` and `long double` have the same layout, and
+; otherwise lower to `f128` versions (e.g. `sinf128`).
+;
+; Targets include:
+; * x86, x64 (80-bit long double)
+; * aarch64 (long double == f128)
+; * riscv32 (long double == f64)
+; * s390x (long double == f128, hardware support)
+; * A couple assorted environments for x86
+;
+; FIXME: only targets where long double is `f128` should be using `USELD`, all
+; others need to be NOTLD. PowerPC should be added but it currently emits an
+; interesting blend of both (e.g. `acosl` but `ceilf128`).
+
+define fp128 @test_acosf128(fp128 %a) {
+; CHECK-LABEL:  test_acosf128:
+; CHECK-NOTLD:  acosf128
+; CHECK-USELD:  acosl
+; CHECK-S390X:  acosl
+start:
+  %0 = tail call fp128 @llvm.acos.f128(fp128 %a)
+  ret fp128 %0
+}
+
+define fp128 @test_asinf128(fp128 %a) {
+; CHECK-LABEL:  test_asinf128:
+; CHECK-NOTLD:  asinf128
+; CHECK-USELD:  asinl
+; CHECK-S390X:  asinl
+start:
+  %0 = tail call fp128 @llvm.asin.f128(fp128 %a)
+  ret fp128 %0
+}
+
+define fp128 @test_atanf128(fp128 %a) {
+; CHECK-LABEL:  test_atanf128:
+; CHECK-NOTLD:  atanf128
+; CHECK-USELD:  atanl
+; CHECK-S390X:  atanl
+start:
+  %0 = tail call fp128 @llvm.atan.f128(fp128 %a)
+  ret fp128 %0
+}
+
+define fp128 @test_ceilf128(fp128 %a) {
+; CHECK-LABEL:  test_ceilf128:
+; CHECK-NOTLD:  ceilf128
+; CHECK-USELD:  ceill
+; CHECK-S390X:  ceill
+start:
+  %0 = tail call fp128 @llvm.ceil.f128(fp128 %a)
+  ret fp128 %0
+}
+
+define fp128 @test_copysignf128(fp128 %a, fp128 %b) {
+; copysign should always get lowered to assembly
+; CHECK-LABEL:  test_copysignf128:
+; CHECK-NOT:copysignl
+; CHECK-NOT:copysignf128
+start:
+  %0 = tail call fp128 @llvm.copysign.f128(fp128 %a, fp128 %b)
+  ret fp128 %0
+}
+
+define fp128 @test_cosf128(fp128 %a) {
+; CHECK-LABEL:  test_cosf128:
+; CHECK-NOTLD:  cosf128
+; CHECK-USELD:  cosl
+; CHECK-S390X:  cosl
+start:
+  %0 = tail call fp128 @llvm.cos.f128(fp128 %a)
+  ret fp128 %0
+}
+
+define fp128 @test_exp10f128(fp128 %a) {
+; CHECK-LABEL:  test_exp2f128:
+; CHECK-NOTLD:  exp10f128
+; CHECK-USELD:  exp10l
+; CHECK-S390X:  exp10l
+start:
+  %0 = tail call fp128 @llvm.exp10.f128(fp128 %a)
+  ret fp128 %0
+}
+
+define fp128 @test_exp2f128(fp128 %a) {
+; CHECK-LABEL:  test_exp2f128:
+; CHECK-NOTLD:  exp2f128
+; CHECK-USELD:  exp2l
+; CHECK-S390X:  exp2l
+start:
+  %0 = tail call fp128 @llvm.exp2.f128(fp128 %a)
+  ret fp128 %0
+}
+
+
+define fp128 @test_expf128(fp128 %a) {
+; CHECK-LABEL:  test_expf128:
+; CHECK-NOTLD:  expf128
+; CHECK-USELD:  expl
+; CHECK-S390X:  expl
+start:
+  %0 = tail call fp128 @llvm.exp.f128(fp128 %a)
+  ret fp128 %0
+}
+
+define fp128 @test_fabsf128(fp128 %a) {
+; fabs should always get lowered to assembly
+; CHECK-LABEL:  test_fabsf128:
+; CHECK-NOT:fabsl
+; CHECK-NOT:fabsf128
+start:
+  %0 = tail call fp128 @llvm.fabs.f128(fp128 %a)
+  ret fp128 %0
+}
+
+define fp128 @test_floorf128(fp128 %a) {
+; CHECK-LABEL:  test

[clang] [llvm] [WIP] Correct lowering of `fp128` intrinsics (PR #76558)

2025-04-27 Thread Trevor Gross via cfe-commits

https://github.com/tgross35 updated 
https://github.com/llvm/llvm-project/pull/76558

>From dafb3a378876db6b3bf505b425b386fd8f79c918 Mon Sep 17 00:00:00 2001
From: Trevor Gross 
Date: Fri, 11 Aug 2023 22:16:01 -0400
Subject: [PATCH 1/5] [IR] Add a test for `f128` libcall lowering (NFC)

`f128` intrinsic functions sometimes lower to `long double` library
calls when they instead need to be `f128` versions. Add a test
demonstrating current behavior.
---
 .../CodeGen/Generic/f128-math-lowering.ll | 328 ++
 1 file changed, 328 insertions(+)
 create mode 100644 llvm/test/CodeGen/Generic/f128-math-lowering.ll

diff --git a/llvm/test/CodeGen/Generic/f128-math-lowering.ll 
b/llvm/test/CodeGen/Generic/f128-math-lowering.ll
new file mode 100644
index 0..dfbd1eaeda109
--- /dev/null
+++ b/llvm/test/CodeGen/Generic/f128-math-lowering.ll
@@ -0,0 +1,328 @@
+; RUN: llc < %s -mtriple=aarch64-unknown-none -verify-machineinstrs | 
FileCheck %s --check-prefix=CHECK-USELD
+; RUN: llc < %s -mtriple=i686-unknown -verify-machineinstrs | FileCheck %s 
--check-prefix=CHECK-USELD
+; RUN: llc < %s -mtriple=riscv32 -verify-machineinstrs | FileCheck %s 
--check-prefix=CHECK-USELD
+; RUN: llc < %s -mtriple=s390x-unknown -verify-machineinstrs | FileCheck %s 
--check-prefix=CHECK-S390X
+; RUN: llc < %s -mtriple=x86_64-unknown -verify-machineinstrs | FileCheck %s 
--check-prefix=CHECK-USELD
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -verify-machineinstrs | 
FileCheck %s --check-prefix=CHECK-NOTLD
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-musl -verify-machineinstrs | 
FileCheck %s --check-prefix=CHECK-USELD
+;
+; REQUIRES: aarch64-registered-target
+; REQUIRES: riscv-registered-target
+; REQUIRES: systemz-registered-target
+; REQUIRES: x86-registered-target
+;
+; Verify that fp128 intrinsics only lower to `long double` calls (e.g. `sinl`)
+; on platforms where `f128` and `long double` have the same layout, and
+; otherwise lower to `f128` versions (e.g. `sinf128`).
+;
+; Targets include:
+; * x86, x64 (80-bit long double)
+; * aarch64 (long double == f128)
+; * riscv32 (long double == f64)
+; * s390x (long double == f128, hardware support)
+; * A couple assorted environments for x86
+;
+; FIXME: only targets where long double is `f128` should be using `USELD`, all
+; others need to be NOTLD. PowerPC should be added but it currently emits an
+; interesting blend of both (e.g. `acosl` but `ceilf128`).
+
+define fp128 @test_acosf128(fp128 %a) {
+; CHECK-LABEL:  test_acosf128:
+; CHECK-NOTLD:  acosf128
+; CHECK-USELD:  acosl
+; CHECK-S390X:  acosl
+start:
+  %0 = tail call fp128 @llvm.acos.f128(fp128 %a)
+  ret fp128 %0
+}
+
+define fp128 @test_asinf128(fp128 %a) {
+; CHECK-LABEL:  test_asinf128:
+; CHECK-NOTLD:  asinf128
+; CHECK-USELD:  asinl
+; CHECK-S390X:  asinl
+start:
+  %0 = tail call fp128 @llvm.asin.f128(fp128 %a)
+  ret fp128 %0
+}
+
+define fp128 @test_atanf128(fp128 %a) {
+; CHECK-LABEL:  test_atanf128:
+; CHECK-NOTLD:  atanf128
+; CHECK-USELD:  atanl
+; CHECK-S390X:  atanl
+start:
+  %0 = tail call fp128 @llvm.atan.f128(fp128 %a)
+  ret fp128 %0
+}
+
+define fp128 @test_ceilf128(fp128 %a) {
+; CHECK-LABEL:  test_ceilf128:
+; CHECK-NOTLD:  ceilf128
+; CHECK-USELD:  ceill
+; CHECK-S390X:  ceill
+start:
+  %0 = tail call fp128 @llvm.ceil.f128(fp128 %a)
+  ret fp128 %0
+}
+
+define fp128 @test_copysignf128(fp128 %a, fp128 %b) {
+; copysign should always get lowered to assembly
+; CHECK-LABEL:  test_copysignf128:
+; CHECK-NOT:copysignl
+; CHECK-NOT:copysignf128
+start:
+  %0 = tail call fp128 @llvm.copysign.f128(fp128 %a, fp128 %b)
+  ret fp128 %0
+}
+
+define fp128 @test_cosf128(fp128 %a) {
+; CHECK-LABEL:  test_cosf128:
+; CHECK-NOTLD:  cosf128
+; CHECK-USELD:  cosl
+; CHECK-S390X:  cosl
+start:
+  %0 = tail call fp128 @llvm.cos.f128(fp128 %a)
+  ret fp128 %0
+}
+
+define fp128 @test_exp10f128(fp128 %a) {
+; CHECK-LABEL:  test_exp2f128:
+; CHECK-NOTLD:  exp10f128
+; CHECK-USELD:  exp10l
+; CHECK-S390X:  exp10l
+start:
+  %0 = tail call fp128 @llvm.exp10.f128(fp128 %a)
+  ret fp128 %0
+}
+
+define fp128 @test_exp2f128(fp128 %a) {
+; CHECK-LABEL:  test_exp2f128:
+; CHECK-NOTLD:  exp2f128
+; CHECK-USELD:  exp2l
+; CHECK-S390X:  exp2l
+start:
+  %0 = tail call fp128 @llvm.exp2.f128(fp128 %a)
+  ret fp128 %0
+}
+
+
+define fp128 @test_expf128(fp128 %a) {
+; CHECK-LABEL:  test_expf128:
+; CHECK-NOTLD:  expf128
+; CHECK-USELD:  expl
+; CHECK-S390X:  expl
+start:
+  %0 = tail call fp128 @llvm.exp.f128(fp128 %a)
+  ret fp128 %0
+}
+
+define fp128 @test_fabsf128(fp128 %a) {
+; fabs should always get lowered to assembly
+; CHECK-LABEL:  test_fabsf128:
+; CHECK-NOT:fabsl
+; CHECK-NOT:fabsf128
+start:
+  %0 = tail call fp128 @llvm.fabs.f128(fp128 %a)
+  ret fp128 %0
+}
+
+define fp128 @test_floorf128(fp128 %a) {
+; CHECK-LABEL:  test

[clang] [llvm] [WIP] Correct lowering of `fp128` intrinsics (PR #76558)

2025-04-27 Thread Trevor Gross via cfe-commits

https://github.com/tgross35 updated 
https://github.com/llvm/llvm-project/pull/76558

>From dafb3a378876db6b3bf505b425b386fd8f79c918 Mon Sep 17 00:00:00 2001
From: Trevor Gross 
Date: Fri, 11 Aug 2023 22:16:01 -0400
Subject: [PATCH 1/4] [IR] Add a test for `f128` libcall lowering (NFC)

`f128` intrinsic functions sometimes lower to `long double` library
calls when they instead need to be `f128` versions. Add a test
demonstrating current behavior.
---
 .../CodeGen/Generic/f128-math-lowering.ll | 328 ++
 1 file changed, 328 insertions(+)
 create mode 100644 llvm/test/CodeGen/Generic/f128-math-lowering.ll

diff --git a/llvm/test/CodeGen/Generic/f128-math-lowering.ll 
b/llvm/test/CodeGen/Generic/f128-math-lowering.ll
new file mode 100644
index 0..dfbd1eaeda109
--- /dev/null
+++ b/llvm/test/CodeGen/Generic/f128-math-lowering.ll
@@ -0,0 +1,328 @@
+; RUN: llc < %s -mtriple=aarch64-unknown-none -verify-machineinstrs | 
FileCheck %s --check-prefix=CHECK-USELD
+; RUN: llc < %s -mtriple=i686-unknown -verify-machineinstrs | FileCheck %s 
--check-prefix=CHECK-USELD
+; RUN: llc < %s -mtriple=riscv32 -verify-machineinstrs | FileCheck %s 
--check-prefix=CHECK-USELD
+; RUN: llc < %s -mtriple=s390x-unknown -verify-machineinstrs | FileCheck %s 
--check-prefix=CHECK-S390X
+; RUN: llc < %s -mtriple=x86_64-unknown -verify-machineinstrs | FileCheck %s 
--check-prefix=CHECK-USELD
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -verify-machineinstrs | 
FileCheck %s --check-prefix=CHECK-NOTLD
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-musl -verify-machineinstrs | 
FileCheck %s --check-prefix=CHECK-USELD
+;
+; REQUIRES: aarch64-registered-target
+; REQUIRES: riscv-registered-target
+; REQUIRES: systemz-registered-target
+; REQUIRES: x86-registered-target
+;
+; Verify that fp128 intrinsics only lower to `long double` calls (e.g. `sinl`)
+; on platforms where `f128` and `long double` have the same layout, and
+; otherwise lower to `f128` versions (e.g. `sinf128`).
+;
+; Targets include:
+; * x86, x64 (80-bit long double)
+; * aarch64 (long double == f128)
+; * riscv32 (long double == f64)
+; * s390x (long double == f128, hardware support)
+; * A couple assorted environments for x86
+;
+; FIXME: only targets where long double is `f128` should be using `USELD`, all
+; others need to be NOTLD. PowerPC should be added but it currently emits an
+; interesting blend of both (e.g. `acosl` but `ceilf128`).
+
+define fp128 @test_acosf128(fp128 %a) {
+; CHECK-LABEL:  test_acosf128:
+; CHECK-NOTLD:  acosf128
+; CHECK-USELD:  acosl
+; CHECK-S390X:  acosl
+start:
+  %0 = tail call fp128 @llvm.acos.f128(fp128 %a)
+  ret fp128 %0
+}
+
+define fp128 @test_asinf128(fp128 %a) {
+; CHECK-LABEL:  test_asinf128:
+; CHECK-NOTLD:  asinf128
+; CHECK-USELD:  asinl
+; CHECK-S390X:  asinl
+start:
+  %0 = tail call fp128 @llvm.asin.f128(fp128 %a)
+  ret fp128 %0
+}
+
+define fp128 @test_atanf128(fp128 %a) {
+; CHECK-LABEL:  test_atanf128:
+; CHECK-NOTLD:  atanf128
+; CHECK-USELD:  atanl
+; CHECK-S390X:  atanl
+start:
+  %0 = tail call fp128 @llvm.atan.f128(fp128 %a)
+  ret fp128 %0
+}
+
+define fp128 @test_ceilf128(fp128 %a) {
+; CHECK-LABEL:  test_ceilf128:
+; CHECK-NOTLD:  ceilf128
+; CHECK-USELD:  ceill
+; CHECK-S390X:  ceill
+start:
+  %0 = tail call fp128 @llvm.ceil.f128(fp128 %a)
+  ret fp128 %0
+}
+
+define fp128 @test_copysignf128(fp128 %a, fp128 %b) {
+; copysign should always get lowered to assembly
+; CHECK-LABEL:  test_copysignf128:
+; CHECK-NOT:copysignl
+; CHECK-NOT:copysignf128
+start:
+  %0 = tail call fp128 @llvm.copysign.f128(fp128 %a, fp128 %b)
+  ret fp128 %0
+}
+
+define fp128 @test_cosf128(fp128 %a) {
+; CHECK-LABEL:  test_cosf128:
+; CHECK-NOTLD:  cosf128
+; CHECK-USELD:  cosl
+; CHECK-S390X:  cosl
+start:
+  %0 = tail call fp128 @llvm.cos.f128(fp128 %a)
+  ret fp128 %0
+}
+
+define fp128 @test_exp10f128(fp128 %a) {
+; CHECK-LABEL:  test_exp2f128:
+; CHECK-NOTLD:  exp10f128
+; CHECK-USELD:  exp10l
+; CHECK-S390X:  exp10l
+start:
+  %0 = tail call fp128 @llvm.exp10.f128(fp128 %a)
+  ret fp128 %0
+}
+
+define fp128 @test_exp2f128(fp128 %a) {
+; CHECK-LABEL:  test_exp2f128:
+; CHECK-NOTLD:  exp2f128
+; CHECK-USELD:  exp2l
+; CHECK-S390X:  exp2l
+start:
+  %0 = tail call fp128 @llvm.exp2.f128(fp128 %a)
+  ret fp128 %0
+}
+
+
+define fp128 @test_expf128(fp128 %a) {
+; CHECK-LABEL:  test_expf128:
+; CHECK-NOTLD:  expf128
+; CHECK-USELD:  expl
+; CHECK-S390X:  expl
+start:
+  %0 = tail call fp128 @llvm.exp.f128(fp128 %a)
+  ret fp128 %0
+}
+
+define fp128 @test_fabsf128(fp128 %a) {
+; fabs should always get lowered to assembly
+; CHECK-LABEL:  test_fabsf128:
+; CHECK-NOT:fabsl
+; CHECK-NOT:fabsf128
+start:
+  %0 = tail call fp128 @llvm.fabs.f128(fp128 %a)
+  ret fp128 %0
+}
+
+define fp128 @test_floorf128(fp128 %a) {
+; CHECK-LABEL:  test

[clang] [llvm] [WIP] Correct lowering of `fp128` intrinsics (PR #76558)

2025-04-26 Thread Trevor Gross via cfe-commits

https://github.com/tgross35 updated 
https://github.com/llvm/llvm-project/pull/76558

>From dafb3a378876db6b3bf505b425b386fd8f79c918 Mon Sep 17 00:00:00 2001
From: Trevor Gross 
Date: Fri, 11 Aug 2023 22:16:01 -0400
Subject: [PATCH 1/3] [IR] Add a test for `f128` libcall lowering (NFC)

`f128` intrinsic functions sometimes lower to `long double` library
calls when they instead need to be `f128` versions. Add a test
demonstrating current behavior.
---
 .../CodeGen/Generic/f128-math-lowering.ll | 328 ++
 1 file changed, 328 insertions(+)
 create mode 100644 llvm/test/CodeGen/Generic/f128-math-lowering.ll

diff --git a/llvm/test/CodeGen/Generic/f128-math-lowering.ll 
b/llvm/test/CodeGen/Generic/f128-math-lowering.ll
new file mode 100644
index 0..dfbd1eaeda109
--- /dev/null
+++ b/llvm/test/CodeGen/Generic/f128-math-lowering.ll
@@ -0,0 +1,328 @@
+; RUN: llc < %s -mtriple=aarch64-unknown-none -verify-machineinstrs | 
FileCheck %s --check-prefix=CHECK-USELD
+; RUN: llc < %s -mtriple=i686-unknown -verify-machineinstrs | FileCheck %s 
--check-prefix=CHECK-USELD
+; RUN: llc < %s -mtriple=riscv32 -verify-machineinstrs | FileCheck %s 
--check-prefix=CHECK-USELD
+; RUN: llc < %s -mtriple=s390x-unknown -verify-machineinstrs | FileCheck %s 
--check-prefix=CHECK-S390X
+; RUN: llc < %s -mtriple=x86_64-unknown -verify-machineinstrs | FileCheck %s 
--check-prefix=CHECK-USELD
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -verify-machineinstrs | 
FileCheck %s --check-prefix=CHECK-NOTLD
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-musl -verify-machineinstrs | 
FileCheck %s --check-prefix=CHECK-USELD
+;
+; REQUIRES: aarch64-registered-target
+; REQUIRES: riscv-registered-target
+; REQUIRES: systemz-registered-target
+; REQUIRES: x86-registered-target
+;
+; Verify that fp128 intrinsics only lower to `long double` calls (e.g. `sinl`)
+; on platforms where `f128` and `long double` have the same layout, and
+; otherwise lower to `f128` versions (e.g. `sinf128`).
+;
+; Targets include:
+; * x86, x64 (80-bit long double)
+; * aarch64 (long double == f128)
+; * riscv32 (long double == f64)
+; * s390x (long double == f128, hardware support)
+; * A couple assorted environments for x86
+;
+; FIXME: only targets where long double is `f128` should be using `USELD`, all
+; others need to be NOTLD. PowerPC should be added but it currently emits an
+; interesting blend of both (e.g. `acosl` but `ceilf128`).
+
+define fp128 @test_acosf128(fp128 %a) {
+; CHECK-LABEL:  test_acosf128:
+; CHECK-NOTLD:  acosf128
+; CHECK-USELD:  acosl
+; CHECK-S390X:  acosl
+start:
+  %0 = tail call fp128 @llvm.acos.f128(fp128 %a)
+  ret fp128 %0
+}
+
+define fp128 @test_asinf128(fp128 %a) {
+; CHECK-LABEL:  test_asinf128:
+; CHECK-NOTLD:  asinf128
+; CHECK-USELD:  asinl
+; CHECK-S390X:  asinl
+start:
+  %0 = tail call fp128 @llvm.asin.f128(fp128 %a)
+  ret fp128 %0
+}
+
+define fp128 @test_atanf128(fp128 %a) {
+; CHECK-LABEL:  test_atanf128:
+; CHECK-NOTLD:  atanf128
+; CHECK-USELD:  atanl
+; CHECK-S390X:  atanl
+start:
+  %0 = tail call fp128 @llvm.atan.f128(fp128 %a)
+  ret fp128 %0
+}
+
+define fp128 @test_ceilf128(fp128 %a) {
+; CHECK-LABEL:  test_ceilf128:
+; CHECK-NOTLD:  ceilf128
+; CHECK-USELD:  ceill
+; CHECK-S390X:  ceill
+start:
+  %0 = tail call fp128 @llvm.ceil.f128(fp128 %a)
+  ret fp128 %0
+}
+
+define fp128 @test_copysignf128(fp128 %a, fp128 %b) {
+; copysign should always get lowered to assembly
+; CHECK-LABEL:  test_copysignf128:
+; CHECK-NOT:copysignl
+; CHECK-NOT:copysignf128
+start:
+  %0 = tail call fp128 @llvm.copysign.f128(fp128 %a, fp128 %b)
+  ret fp128 %0
+}
+
+define fp128 @test_cosf128(fp128 %a) {
+; CHECK-LABEL:  test_cosf128:
+; CHECK-NOTLD:  cosf128
+; CHECK-USELD:  cosl
+; CHECK-S390X:  cosl
+start:
+  %0 = tail call fp128 @llvm.cos.f128(fp128 %a)
+  ret fp128 %0
+}
+
+define fp128 @test_exp10f128(fp128 %a) {
+; CHECK-LABEL:  test_exp2f128:
+; CHECK-NOTLD:  exp10f128
+; CHECK-USELD:  exp10l
+; CHECK-S390X:  exp10l
+start:
+  %0 = tail call fp128 @llvm.exp10.f128(fp128 %a)
+  ret fp128 %0
+}
+
+define fp128 @test_exp2f128(fp128 %a) {
+; CHECK-LABEL:  test_exp2f128:
+; CHECK-NOTLD:  exp2f128
+; CHECK-USELD:  exp2l
+; CHECK-S390X:  exp2l
+start:
+  %0 = tail call fp128 @llvm.exp2.f128(fp128 %a)
+  ret fp128 %0
+}
+
+
+define fp128 @test_expf128(fp128 %a) {
+; CHECK-LABEL:  test_expf128:
+; CHECK-NOTLD:  expf128
+; CHECK-USELD:  expl
+; CHECK-S390X:  expl
+start:
+  %0 = tail call fp128 @llvm.exp.f128(fp128 %a)
+  ret fp128 %0
+}
+
+define fp128 @test_fabsf128(fp128 %a) {
+; fabs should always get lowered to assembly
+; CHECK-LABEL:  test_fabsf128:
+; CHECK-NOT:fabsl
+; CHECK-NOT:fabsf128
+start:
+  %0 = tail call fp128 @llvm.fabs.f128(fp128 %a)
+  ret fp128 %0
+}
+
+define fp128 @test_floorf128(fp128 %a) {
+; CHECK-LABEL:  test

[clang] [llvm] [WIP] Correct lowering of `fp128` intrinsics (PR #76558)

2025-04-26 Thread Trevor Gross via cfe-commits

https://github.com/tgross35 updated 
https://github.com/llvm/llvm-project/pull/76558

>From 9089a966f2107a2dfcad86fec746e9f2bd8a105b Mon Sep 17 00:00:00 2001
From: Trevor Gross 
Date: Fri, 11 Aug 2023 22:16:01 -0400
Subject: [PATCH 1/5] [IR] Add a test for `f128` libcall lowering (NFC)

`f128` intrinsic functions sometimes lower to `long double` library
calls when they instead need to be `f128` versions. Add a test
demonstrating current behavior.
---
 .../CodeGen/Generic/f128-math-lowering.ll | 328 ++
 1 file changed, 328 insertions(+)
 create mode 100644 llvm/test/CodeGen/Generic/f128-math-lowering.ll

diff --git a/llvm/test/CodeGen/Generic/f128-math-lowering.ll 
b/llvm/test/CodeGen/Generic/f128-math-lowering.ll
new file mode 100644
index 0..dfbd1eaeda109
--- /dev/null
+++ b/llvm/test/CodeGen/Generic/f128-math-lowering.ll
@@ -0,0 +1,328 @@
+; RUN: llc < %s -mtriple=aarch64-unknown-none -verify-machineinstrs | 
FileCheck %s --check-prefix=CHECK-USELD
+; RUN: llc < %s -mtriple=i686-unknown -verify-machineinstrs | FileCheck %s 
--check-prefix=CHECK-USELD
+; RUN: llc < %s -mtriple=riscv32 -verify-machineinstrs | FileCheck %s 
--check-prefix=CHECK-USELD
+; RUN: llc < %s -mtriple=s390x-unknown -verify-machineinstrs | FileCheck %s 
--check-prefix=CHECK-S390X
+; RUN: llc < %s -mtriple=x86_64-unknown -verify-machineinstrs | FileCheck %s 
--check-prefix=CHECK-USELD
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -verify-machineinstrs | 
FileCheck %s --check-prefix=CHECK-NOTLD
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-musl -verify-machineinstrs | 
FileCheck %s --check-prefix=CHECK-USELD
+;
+; REQUIRES: aarch64-registered-target
+; REQUIRES: riscv-registered-target
+; REQUIRES: systemz-registered-target
+; REQUIRES: x86-registered-target
+;
+; Verify that fp128 intrinsics only lower to `long double` calls (e.g. `sinl`)
+; on platforms where `f128` and `long double` have the same layout, and
+; otherwise lower to `f128` versions (e.g. `sinf128`).
+;
+; Targets include:
+; * x86, x64 (80-bit long double)
+; * aarch64 (long double == f128)
+; * riscv32 (long double == f64)
+; * s390x (long double == f128, hardware support)
+; * A couple assorted environments for x86
+;
+; FIXME: only targets where long double is `f128` should be using `USELD`, all
+; others need to be NOTLD. PowerPC should be added but it currently emits an
+; interesting blend of both (e.g. `acosl` but `ceilf128`).
+
+define fp128 @test_acosf128(fp128 %a) {
+; CHECK-LABEL:  test_acosf128:
+; CHECK-NOTLD:  acosf128
+; CHECK-USELD:  acosl
+; CHECK-S390X:  acosl
+start:
+  %0 = tail call fp128 @llvm.acos.f128(fp128 %a)
+  ret fp128 %0
+}
+
+define fp128 @test_asinf128(fp128 %a) {
+; CHECK-LABEL:  test_asinf128:
+; CHECK-NOTLD:  asinf128
+; CHECK-USELD:  asinl
+; CHECK-S390X:  asinl
+start:
+  %0 = tail call fp128 @llvm.asin.f128(fp128 %a)
+  ret fp128 %0
+}
+
+define fp128 @test_atanf128(fp128 %a) {
+; CHECK-LABEL:  test_atanf128:
+; CHECK-NOTLD:  atanf128
+; CHECK-USELD:  atanl
+; CHECK-S390X:  atanl
+start:
+  %0 = tail call fp128 @llvm.atan.f128(fp128 %a)
+  ret fp128 %0
+}
+
+define fp128 @test_ceilf128(fp128 %a) {
+; CHECK-LABEL:  test_ceilf128:
+; CHECK-NOTLD:  ceilf128
+; CHECK-USELD:  ceill
+; CHECK-S390X:  ceill
+start:
+  %0 = tail call fp128 @llvm.ceil.f128(fp128 %a)
+  ret fp128 %0
+}
+
+define fp128 @test_copysignf128(fp128 %a, fp128 %b) {
+; copysign should always get lowered to assembly
+; CHECK-LABEL:  test_copysignf128:
+; CHECK-NOT:copysignl
+; CHECK-NOT:copysignf128
+start:
+  %0 = tail call fp128 @llvm.copysign.f128(fp128 %a, fp128 %b)
+  ret fp128 %0
+}
+
+define fp128 @test_cosf128(fp128 %a) {
+; CHECK-LABEL:  test_cosf128:
+; CHECK-NOTLD:  cosf128
+; CHECK-USELD:  cosl
+; CHECK-S390X:  cosl
+start:
+  %0 = tail call fp128 @llvm.cos.f128(fp128 %a)
+  ret fp128 %0
+}
+
+define fp128 @test_exp10f128(fp128 %a) {
+; CHECK-LABEL:  test_exp2f128:
+; CHECK-NOTLD:  exp10f128
+; CHECK-USELD:  exp10l
+; CHECK-S390X:  exp10l
+start:
+  %0 = tail call fp128 @llvm.exp10.f128(fp128 %a)
+  ret fp128 %0
+}
+
+define fp128 @test_exp2f128(fp128 %a) {
+; CHECK-LABEL:  test_exp2f128:
+; CHECK-NOTLD:  exp2f128
+; CHECK-USELD:  exp2l
+; CHECK-S390X:  exp2l
+start:
+  %0 = tail call fp128 @llvm.exp2.f128(fp128 %a)
+  ret fp128 %0
+}
+
+
+define fp128 @test_expf128(fp128 %a) {
+; CHECK-LABEL:  test_expf128:
+; CHECK-NOTLD:  expf128
+; CHECK-USELD:  expl
+; CHECK-S390X:  expl
+start:
+  %0 = tail call fp128 @llvm.exp.f128(fp128 %a)
+  ret fp128 %0
+}
+
+define fp128 @test_fabsf128(fp128 %a) {
+; fabs should always get lowered to assembly
+; CHECK-LABEL:  test_fabsf128:
+; CHECK-NOT:fabsl
+; CHECK-NOT:fabsf128
+start:
+  %0 = tail call fp128 @llvm.fabs.f128(fp128 %a)
+  ret fp128 %0
+}
+
+define fp128 @test_floorf128(fp128 %a) {
+; CHECK-LABEL:  test

[clang] [llvm] [WIP] Correct lowering of `fp128` intrinsics (PR #76558)

2025-04-21 Thread Trevor Gross via cfe-commits

tgross35 wrote:

Talked to arsenm on discord, long discussion starting around here 
https://discord.com/channels/636084430946959380/636732535434510338/1362207130559578185.
 The outcome is that this is effectively a target option and needs to be tied 
to the triple rather than per-module. Which makes sense and avoids the above 
problem.

So, I'll be doing the following:

1. Make LLVM assume that `sqrtf128` (and similar) libcalls are available by 
default
2. On 64-bit arm, loongarch, mips, risc-v, and s390x musl targets, use `sqrtl` 
instead
3. Add some way to make musl targets also use `sqrtf128`, like `-nolongdouble` 
in the target triple

This should work because calling `sqrtf128` is correct on _most_ platforms:
* On Windows, Apple, and 32-bit platforms, `long double` is `f64` so `sqrtf128` 
is the only correct call
* On x86, `long double` is the x87 80-bit float so `sqrtf128` is the only 
correct call
* On anything glibc, `sqrtf128` is an alias to `sqrtl` on platforms where that 
works, so `sqrtf128` can always be called
* That leaves 64-bit `musl` on platforms where `long double` is `f128` as the 
only platforms where `sqrtl` has to be called (otherwise calling `sqrtl` from C 
would get intercepted and relowered as `sqrtf128` for a linker error)

https://github.com/llvm/llvm-project/pull/76558
___
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [WIP] Correct lowering of `fp128` intrinsics (PR #76558)

2025-04-16 Thread Trevor Gross via cfe-commits


@@ -0,0 +1,331 @@
+; RUN: llc < %s -mtriple=aarch64-unknown-none -verify-machineinstrs | 
FileCheck %s --check-prefix=CHECK-USELD
+; RUN: llc < %s -mtriple=i686-unknown -verify-machineinstrs | FileCheck %s 
--check-prefix=CHECK-NOTLD
+; RUN: llc < %s -mtriple=powerpc-unknown -verify-machineinstrs | FileCheck %s 
--check-prefix=CHECK-NOTLD
+; RUN: llc < %s -mtriple=powerpc64-unknown -verify-machineinstrs | FileCheck 
%s --check-prefix=CHECK-NOTLD
+; RUN: llc < %s -mtriple=riscv32 -verify-machineinstrs | FileCheck %s 
--check-prefix=CHECK-NOTLD
+; RUN: llc < %s -mtriple=s390x-unknown -verify-machineinstrs | FileCheck %s 
--check-prefix=CHECK-S390X
+; RUN: llc < %s -mtriple=x86_64-unknown -verify-machineinstrs | FileCheck %s 
--check-prefix=CHECK-NOTLD
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -verify-machineinstrs | 
FileCheck %s --check-prefix=CHECK-NOTLD
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-musl -verify-machineinstrs | 
FileCheck %s --check-prefix=CHECK-NOTLD
+;
+; REQUIRES: aarch64-registered-target
+; REQUIRES: powerpc-registered-target
+; REQUIRES: riscv-registered-target
+; REQUIRES: systemz-registered-target
+; REQUIRES: x86-registered-target

tgross35 wrote:

Todo: replace this with `%if` somehow so this test still runs if only a subset 
of architectures is available https://llvm.org/docs/TestingGuide.html

https://github.com/llvm/llvm-project/pull/76558
___
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [WIP] Correct lowering of `fp128` intrinsics (PR #76558)

2025-03-01 Thread Trevor Gross via cfe-commits

tgross35 wrote:

In either case, I need to have the module flags available pretty early and I'm 
not sure how to do that. Ideally they would be available when `TargetLowering` 
is constructed or sometime before it is used for lowering, but it only gets a 
`TargetMachine` as a paremeter. All values in `TargetOptions` seem to be 
configured once and don't pay attention to module flags or take the module as a 
parameter - is there a reason for that? I'm wondering if `TargetMachine` is 
intended to be unchanging across different modules.

https://github.com/llvm/llvm-project/pull/76558
___
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [WIP] Correct lowering of `fp128` intrinsics (PR #76558)

2025-02-28 Thread Trevor Gross via cfe-commits

tgross35 wrote:

Finally getting around to this after more than a year. @efriedma-quic as an 
alternative to the current implementation of duplicating `long double` layout 
information from Clang to LLVM, would it work if LLVM lowers to `*f128` calls 
but provides a module flag `fp128_use_long_double_libcalls` to prefer the `*l` 
versions? So if Clang or other frontends know that their `long double` is 
`_Float128`, it can select those libcalls.

The advantage is avoided code duplication and the logic is easier to follow. 
Also this avoids problems if linking a library built with an unexpected 
`-mlong-double-` configuration.

The disadvantage is that frontends that don't know about C's `long double` 
can't benefit from the more common `*l` symbols. I don't think this is too big 
of a problem though: it makes no difference with glibc (the f128 aliases have 
been around sufficiently long) or on any platforms where `long double` is not 
`_Float128`. And it is easy enough for frontends to set 
`fp128_use_long_double_libcalls` on a case-by-case basis if they know what math 
library is being used (e.g. aarch64 musl).

(I handle the f128 support for Rust and would much rather never think about 
`*l` symbols, I can alias them to `*f128` if needed or set the flag)

https://github.com/llvm/llvm-project/pull/76558
___
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [WIP] Correct lowering of `fp128` intrinsics (PR #76558)

2025-02-27 Thread Trevor Gross via cfe-commits

https://github.com/tgross35 updated 
https://github.com/llvm/llvm-project/pull/76558

>From 31405591b5661156348ec7a45e66eb43e0ace15b Mon Sep 17 00:00:00 2001
From: Trevor Gross 
Date: Fri, 11 Aug 2023 22:16:01 -0400
Subject: [PATCH 1/6] [IR] Add a test for `f128` libcall lowering (nfc)

`f128` intrinsic functions sometimes lower to `long double` library
calls when they instead need to be `f128` versions. Add a test
demonstrating current behavior.
---
 .../CodeGen/Generic/f128-math-lowering.ll | 328 ++
 1 file changed, 328 insertions(+)
 create mode 100644 llvm/test/CodeGen/Generic/f128-math-lowering.ll

diff --git a/llvm/test/CodeGen/Generic/f128-math-lowering.ll 
b/llvm/test/CodeGen/Generic/f128-math-lowering.ll
new file mode 100644
index 0..dfbd1eaeda109
--- /dev/null
+++ b/llvm/test/CodeGen/Generic/f128-math-lowering.ll
@@ -0,0 +1,328 @@
+; RUN: llc < %s -mtriple=aarch64-unknown-none -verify-machineinstrs | 
FileCheck %s --check-prefix=CHECK-USELD
+; RUN: llc < %s -mtriple=i686-unknown -verify-machineinstrs | FileCheck %s 
--check-prefix=CHECK-USELD
+; RUN: llc < %s -mtriple=riscv32 -verify-machineinstrs | FileCheck %s 
--check-prefix=CHECK-USELD
+; RUN: llc < %s -mtriple=s390x-unknown -verify-machineinstrs | FileCheck %s 
--check-prefix=CHECK-S390X
+; RUN: llc < %s -mtriple=x86_64-unknown -verify-machineinstrs | FileCheck %s 
--check-prefix=CHECK-USELD
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -verify-machineinstrs | 
FileCheck %s --check-prefix=CHECK-NOTLD
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-musl -verify-machineinstrs | 
FileCheck %s --check-prefix=CHECK-USELD
+;
+; REQUIRES: aarch64-registered-target
+; REQUIRES: riscv-registered-target
+; REQUIRES: systemz-registered-target
+; REQUIRES: x86-registered-target
+;
+; Verify that fp128 intrinsics only lower to `long double` calls (e.g. `sinl`)
+; on platforms where `f128` and `long double` have the same layout, and
+; otherwise lower to `f128` versions (e.g. `sinf128`).
+;
+; Targets include:
+; * x86, x64 (80-bit long double)
+; * aarch64 (long double == f128)
+; * riscv32 (long double == f64)
+; * s390x (long double == f128, hardware support)
+; * A couple assorted environments for x86
+;
+; FIXME: only targets where long double is `f128` should be using `USELD`, all
+; others need to be NOTLD. PowerPC should be added but it currently emits an
+; interesting blend of both (e.g. `acosl` but `ceilf128`).
+
+define fp128 @test_acosf128(fp128 %a) {
+; CHECK-LABEL:  test_acosf128:
+; CHECK-NOTLD:  acosf128
+; CHECK-USELD:  acosl
+; CHECK-S390X:  acosl
+start:
+  %0 = tail call fp128 @llvm.acos.f128(fp128 %a)
+  ret fp128 %0
+}
+
+define fp128 @test_asinf128(fp128 %a) {
+; CHECK-LABEL:  test_asinf128:
+; CHECK-NOTLD:  asinf128
+; CHECK-USELD:  asinl
+; CHECK-S390X:  asinl
+start:
+  %0 = tail call fp128 @llvm.asin.f128(fp128 %a)
+  ret fp128 %0
+}
+
+define fp128 @test_atanf128(fp128 %a) {
+; CHECK-LABEL:  test_atanf128:
+; CHECK-NOTLD:  atanf128
+; CHECK-USELD:  atanl
+; CHECK-S390X:  atanl
+start:
+  %0 = tail call fp128 @llvm.atan.f128(fp128 %a)
+  ret fp128 %0
+}
+
+define fp128 @test_ceilf128(fp128 %a) {
+; CHECK-LABEL:  test_ceilf128:
+; CHECK-NOTLD:  ceilf128
+; CHECK-USELD:  ceill
+; CHECK-S390X:  ceill
+start:
+  %0 = tail call fp128 @llvm.ceil.f128(fp128 %a)
+  ret fp128 %0
+}
+
+define fp128 @test_copysignf128(fp128 %a, fp128 %b) {
+; copysign should always get lowered to assembly
+; CHECK-LABEL:  test_copysignf128:
+; CHECK-NOT:copysignl
+; CHECK-NOT:copysignf128
+start:
+  %0 = tail call fp128 @llvm.copysign.f128(fp128 %a, fp128 %b)
+  ret fp128 %0
+}
+
+define fp128 @test_cosf128(fp128 %a) {
+; CHECK-LABEL:  test_cosf128:
+; CHECK-NOTLD:  cosf128
+; CHECK-USELD:  cosl
+; CHECK-S390X:  cosl
+start:
+  %0 = tail call fp128 @llvm.cos.f128(fp128 %a)
+  ret fp128 %0
+}
+
+define fp128 @test_exp10f128(fp128 %a) {
+; CHECK-LABEL:  test_exp2f128:
+; CHECK-NOTLD:  exp10f128
+; CHECK-USELD:  exp10l
+; CHECK-S390X:  exp10l
+start:
+  %0 = tail call fp128 @llvm.exp10.f128(fp128 %a)
+  ret fp128 %0
+}
+
+define fp128 @test_exp2f128(fp128 %a) {
+; CHECK-LABEL:  test_exp2f128:
+; CHECK-NOTLD:  exp2f128
+; CHECK-USELD:  exp2l
+; CHECK-S390X:  exp2l
+start:
+  %0 = tail call fp128 @llvm.exp2.f128(fp128 %a)
+  ret fp128 %0
+}
+
+
+define fp128 @test_expf128(fp128 %a) {
+; CHECK-LABEL:  test_expf128:
+; CHECK-NOTLD:  expf128
+; CHECK-USELD:  expl
+; CHECK-S390X:  expl
+start:
+  %0 = tail call fp128 @llvm.exp.f128(fp128 %a)
+  ret fp128 %0
+}
+
+define fp128 @test_fabsf128(fp128 %a) {
+; fabs should always get lowered to assembly
+; CHECK-LABEL:  test_fabsf128:
+; CHECK-NOT:fabsl
+; CHECK-NOT:fabsf128
+start:
+  %0 = tail call fp128 @llvm.fabs.f128(fp128 %a)
+  ret fp128 %0
+}
+
+define fp128 @test_floorf128(fp128 %a) {
+; CHECK-LABEL:  test

[clang] [llvm] [WIP] Correct lowering of `fp128` intrinsics (PR #76558)

2025-02-26 Thread Trevor Gross via cfe-commits

https://github.com/tgross35 updated 
https://github.com/llvm/llvm-project/pull/76558

>From f110337467d5a2b1f624eab507daa2bc854def17 Mon Sep 17 00:00:00 2001
From: Trevor Gross 
Date: Fri, 11 Aug 2023 22:16:01 -0400
Subject: [PATCH 1/4] [IR] Add an xpassing test for `f128` intrinsic lowering
 (nfc)

`f128` intrinsic functions lower to incorrect libc calls. Add a test
showing current behavior.
---
 .../CodeGen/Generic/f128-math-lowering.ll | 610 ++
 1 file changed, 610 insertions(+)
 create mode 100644 llvm/test/CodeGen/Generic/f128-math-lowering.ll

diff --git a/llvm/test/CodeGen/Generic/f128-math-lowering.ll 
b/llvm/test/CodeGen/Generic/f128-math-lowering.ll
new file mode 100644
index 0..8a70786d97fe6
--- /dev/null
+++ b/llvm/test/CodeGen/Generic/f128-math-lowering.ll
@@ -0,0 +1,610 @@
+
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 
UTC_ARGS: --version 2
+;
+; RUN: llc < %s -mtriple=aarch64-unknown-unknown -verify-machineinstrs | 
FileCheck %s --check-prefix=CHECK-AARCH64
+; RUN: llc < %s -mtriple=riscv32-unknown-unknown -verify-machineinstrs | 
FileCheck %s --check-prefix=CHECK-RISCV32
+; RUN: llc < %s -mtriple=s390x-unknown-unknown -verify-machineinstrs | 
FileCheck %s --check-prefix=CHECK-S390X
+; RUN: llc < %s -mtriple=i686-unknown-unknown   -verify-machineinstrs | 
FileCheck %s --check-prefix=CHECK-X86
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -verify-machineinstrs | 
FileCheck %s --check-prefix=CHECK-X64
+;
+; Verify that fp128 intrinsics only lower to `long double` calls on platforms
+; where `f128` and `long double` have the same layout.
+;
+; We test on x86 and x64 which have 80-bit ld, as well as aarch64 (ld == f128),
+; riscv32 (ld == f64), and s380x (ld == f128 with different alignment from
+; x64/aarch64 f128).
+;
+; FIXME: these emit calls to long double functions but should emit f128 calls
+
+define fp128 @test_cbrtf128(fp128 %a) {
+; CHECK-LABEL:  test_cbrtf128:
+; CHECK-AARCH64:b llvm.cbrt.f128
+; CHECK-RISCV32:call llvm.cbrt.f128@plt
+; CHECK-S390X:  brasl {{%.*}} llvm.cbrt.f128@PLT
+; CHECK-X64:jmp llvm.cbrt.f128@PLT # TAILCALL
+; CHECK-X86:calll llvm.cbrt.f128@PLT
+start:
+  %0 = tail call fp128 @llvm.cbrt.f128(fp128 %a)
+  ret fp128 %0
+}
+
+declare fp128 @llvm.cbrt.f128(fp128)
+
+
+define fp128 @test_ceilf128(fp128 %a) {
+; CHECK-LABEL:  test_ceilf128:
+; CHECK-AARCH64:b ceill
+; CHECK-RISCV32:call ceill@plt
+; CHECK-S390X:  brasl {{%.*}} ceill@PLT
+; CHECK-X64:jmp ceill@PLT
+; CHECK-X86:calll ceill
+start:
+  %0 = tail call fp128 @llvm.ceil.f128(fp128 %a)
+  ret fp128 %0
+}
+
+declare fp128 @llvm.ceil.f128(fp128)
+
+
+define fp128 @test_copysignf128(fp128 %a, fp128 %b) {
+; No math library call here, so make sure the assembly does the correct thing.
+; This test is autogenerated
+; CHECK-LABEL:test_copysignf128:
+; CHECK-AARCH64-LABEL: test_copysignf128:
+; CHECK-AARCH64:   // %bb.0: // %start
+; CHECK-AARCH64-NEXT:stp q0, q1, [sp, #-32]!
+; CHECK-AARCH64-NEXT:.cfi_def_cfa_offset 32
+; CHECK-AARCH64-NEXT:ldrb w8, [sp, #15]
+; CHECK-AARCH64-NEXT:ldrb w9, [sp, #31]
+; CHECK-AARCH64-NEXT:bfxil w9, w8, #0, #7
+; CHECK-AARCH64-NEXT:strb w9, [sp, #15]
+; CHECK-AARCH64-NEXT:ldr q0, [sp], #32
+; CHECK-AARCH64-NEXT:ret
+;
+; CHECK-RISCV32-LABEL: test_copysignf128:
+; CHECK-RISCV32:   # %bb.0: # %start
+; CHECK-RISCV32-NEXT:lw a3, 0(a1)
+; CHECK-RISCV32-NEXT:lw a4, 4(a1)
+; CHECK-RISCV32-NEXT:lw a2, 12(a2)
+; CHECK-RISCV32-NEXT:lw a5, 12(a1)
+; CHECK-RISCV32-NEXT:lw a1, 8(a1)
+; CHECK-RISCV32-NEXT:lui a6, 524288
+; CHECK-RISCV32-NEXT:and a2, a2, a6
+; CHECK-RISCV32-NEXT:slli a5, a5, 1
+; CHECK-RISCV32-NEXT:srli a5, a5, 1
+; CHECK-RISCV32-NEXT:or a2, a5, a2
+; CHECK-RISCV32-NEXT:sw a1, 8(a0)
+; CHECK-RISCV32-NEXT:sw a4, 4(a0)
+; CHECK-RISCV32-NEXT:sw a3, 0(a0)
+; CHECK-RISCV32-NEXT:sw a2, 12(a0)
+; CHECK-RISCV32-NEXT:ret
+;
+; CHECK-S390X-LABEL: test_copysignf128:
+; CHECK-S390X:   # %bb.0: # %start
+; CHECK-S390X-NEXT:ld %f0, 0(%r3)
+; CHECK-S390X-NEXT:ld %f2, 8(%r3)
+; CHECK-S390X-NEXT:ld %f1, 0(%r4)
+; CHECK-S390X-NEXT:ld %f3, 8(%r4)
+; CHECK-S390X-NEXT:cpsdr %f0, %f1, %f0
+; CHECK-S390X-NEXT:std %f0, 0(%r2)
+; CHECK-S390X-NEXT:std %f2, 8(%r2)
+; CHECK-S390X-NEXT:br %r14
+;
+; CHECK-X86-LABEL: test_copysignf128:
+; CHECK-X86:   # %bb.0: # %start
+; CHECK-X86-NEXT:pushl %ebx
+; CHECK-X86-NEXT:.cfi_def_cfa_offset 8
+; CHECK-X86-NEXT:pushl %edi
+; CHECK-X86-NEXT:.cfi_def_cfa_offset 12
+; CHECK-X86-NEXT:pushl %esi
+; CHECK-X86-NEXT:.cfi_def_cfa_offset 16
+; CHECK-X86-NEXT:.cfi_offset %esi, -16
+; CHECK-X86-NEXT:.cfi_offset %edi, -12
+; CHECK-X86-NEXT:.cfi_offset %ebx, -8
+; CHECK-X86-NEXT:movl {{[0-9]+}}(%esp), %eax
+; CHECK-X86-NEXT:movl {{[0-9]+}}(%esp), %ecx
+; CHECK-

[clang] [llvm] [WIP] Correct lowering of `fp128` intrinsics (PR #76558)

2024-01-22 Thread Eli Friedman via cfe-commits

efriedma-quic wrote:

Putting a function in TargetMachine seems reasonable.

https://github.com/llvm/llvm-project/pull/76558
___
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [WIP] Correct lowering of `fp128` intrinsics (PR #76558)

2024-01-21 Thread Trevor Gross via cfe-commits

tgross35 wrote:

I'm struggling a bit with how to handle ABI information since that affects 
layout (e.g. ARM aapcs), which I think explains most of the errors in 
https://buildkite.com/llvm-project/github-pull-requests/builds/31198#018d26e2-fd17-4e15-a1eb-08580c189056.
 This needs to be available at TargetLoweringBase::InitLibcalls, which calls 
[`getCLayouts`](https://github.com/llvm/llvm-project/blob/cb3bf7540cf9b797575c625318e47a33f7514fad/llvm/lib/TargetParser/Triple.cpp#L1945).

TargetMachine is available at that time, so would it be better to move CLayouts 
from Triple to TargetMachine? If so subclasses could be used rather than the if 
block, which more closely follows the Clang side.

Also, are there currently any module flags that make it to TargetLowering? 
Looking for a reference on how get the -mlong-double-128 information.

https://github.com/llvm/llvm-project/pull/76558
___
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [WIP] Correct lowering of `fp128` intrinsics (PR #76558)

2024-01-20 Thread Trevor Gross via cfe-commits

https://github.com/tgross35 updated 
https://github.com/llvm/llvm-project/pull/76558

>From e869ad1bc601d95b6364dc5619e79a06e8b0fc82 Mon Sep 17 00:00:00 2001
From: Trevor Gross 
Date: Fri, 11 Aug 2023 22:16:01 -0400
Subject: [PATCH 1/5] [IR] Add an xpassing test for `f128` intrinsic lowering

`f128` intrinsic functions lower to incorrect libc calls. Add a test
showing current behavior.
---
 .../CodeGen/Generic/f128-math-lowering.ll | 610 ++
 1 file changed, 610 insertions(+)
 create mode 100644 llvm/test/CodeGen/Generic/f128-math-lowering.ll

diff --git a/llvm/test/CodeGen/Generic/f128-math-lowering.ll 
b/llvm/test/CodeGen/Generic/f128-math-lowering.ll
new file mode 100644
index 00..8a70786d97fe67
--- /dev/null
+++ b/llvm/test/CodeGen/Generic/f128-math-lowering.ll
@@ -0,0 +1,610 @@
+
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 
UTC_ARGS: --version 2
+;
+; RUN: llc < %s -mtriple=aarch64-unknown-unknown -verify-machineinstrs | 
FileCheck %s --check-prefix=CHECK-AARCH64
+; RUN: llc < %s -mtriple=riscv32-unknown-unknown -verify-machineinstrs | 
FileCheck %s --check-prefix=CHECK-RISCV32
+; RUN: llc < %s -mtriple=s390x-unknown-unknown -verify-machineinstrs | 
FileCheck %s --check-prefix=CHECK-S390X
+; RUN: llc < %s -mtriple=i686-unknown-unknown   -verify-machineinstrs | 
FileCheck %s --check-prefix=CHECK-X86
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -verify-machineinstrs | 
FileCheck %s --check-prefix=CHECK-X64
+;
+; Verify that fp128 intrinsics only lower to `long double` calls on platforms
+; where `f128` and `long double` have the same layout.
+;
+; We test on x86 and x64 which have 80-bit ld, as well as aarch64 (ld == f128),
+; riscv32 (ld == f64), and s380x (ld == f128 with different alignment from
+; x64/aarch64 f128).
+;
+; FIXME: these emit calls to long double functions but should emit f128 calls
+
+define fp128 @test_cbrtf128(fp128 %a) {
+; CHECK-LABEL:  test_cbrtf128:
+; CHECK-AARCH64:b llvm.cbrt.f128
+; CHECK-RISCV32:call llvm.cbrt.f128@plt
+; CHECK-S390X:  brasl {{%.*}} llvm.cbrt.f128@PLT
+; CHECK-X64:jmp llvm.cbrt.f128@PLT # TAILCALL
+; CHECK-X86:calll llvm.cbrt.f128@PLT
+start:
+  %0 = tail call fp128 @llvm.cbrt.f128(fp128 %a)
+  ret fp128 %0
+}
+
+declare fp128 @llvm.cbrt.f128(fp128)
+
+
+define fp128 @test_ceilf128(fp128 %a) {
+; CHECK-LABEL:  test_ceilf128:
+; CHECK-AARCH64:b ceill
+; CHECK-RISCV32:call ceill@plt
+; CHECK-S390X:  brasl {{%.*}} ceill@PLT
+; CHECK-X64:jmp ceill@PLT
+; CHECK-X86:calll ceill
+start:
+  %0 = tail call fp128 @llvm.ceil.f128(fp128 %a)
+  ret fp128 %0
+}
+
+declare fp128 @llvm.ceil.f128(fp128)
+
+
+define fp128 @test_copysignf128(fp128 %a, fp128 %b) {
+; No math library call here, so make sure the assembly does the correct thing.
+; This test is autogenerated
+; CHECK-LABEL:test_copysignf128:
+; CHECK-AARCH64-LABEL: test_copysignf128:
+; CHECK-AARCH64:   // %bb.0: // %start
+; CHECK-AARCH64-NEXT:stp q0, q1, [sp, #-32]!
+; CHECK-AARCH64-NEXT:.cfi_def_cfa_offset 32
+; CHECK-AARCH64-NEXT:ldrb w8, [sp, #15]
+; CHECK-AARCH64-NEXT:ldrb w9, [sp, #31]
+; CHECK-AARCH64-NEXT:bfxil w9, w8, #0, #7
+; CHECK-AARCH64-NEXT:strb w9, [sp, #15]
+; CHECK-AARCH64-NEXT:ldr q0, [sp], #32
+; CHECK-AARCH64-NEXT:ret
+;
+; CHECK-RISCV32-LABEL: test_copysignf128:
+; CHECK-RISCV32:   # %bb.0: # %start
+; CHECK-RISCV32-NEXT:lw a3, 0(a1)
+; CHECK-RISCV32-NEXT:lw a4, 4(a1)
+; CHECK-RISCV32-NEXT:lw a2, 12(a2)
+; CHECK-RISCV32-NEXT:lw a5, 12(a1)
+; CHECK-RISCV32-NEXT:lw a1, 8(a1)
+; CHECK-RISCV32-NEXT:lui a6, 524288
+; CHECK-RISCV32-NEXT:and a2, a2, a6
+; CHECK-RISCV32-NEXT:slli a5, a5, 1
+; CHECK-RISCV32-NEXT:srli a5, a5, 1
+; CHECK-RISCV32-NEXT:or a2, a5, a2
+; CHECK-RISCV32-NEXT:sw a1, 8(a0)
+; CHECK-RISCV32-NEXT:sw a4, 4(a0)
+; CHECK-RISCV32-NEXT:sw a3, 0(a0)
+; CHECK-RISCV32-NEXT:sw a2, 12(a0)
+; CHECK-RISCV32-NEXT:ret
+;
+; CHECK-S390X-LABEL: test_copysignf128:
+; CHECK-S390X:   # %bb.0: # %start
+; CHECK-S390X-NEXT:ld %f0, 0(%r3)
+; CHECK-S390X-NEXT:ld %f2, 8(%r3)
+; CHECK-S390X-NEXT:ld %f1, 0(%r4)
+; CHECK-S390X-NEXT:ld %f3, 8(%r4)
+; CHECK-S390X-NEXT:cpsdr %f0, %f1, %f0
+; CHECK-S390X-NEXT:std %f0, 0(%r2)
+; CHECK-S390X-NEXT:std %f2, 8(%r2)
+; CHECK-S390X-NEXT:br %r14
+;
+; CHECK-X86-LABEL: test_copysignf128:
+; CHECK-X86:   # %bb.0: # %start
+; CHECK-X86-NEXT:pushl %ebx
+; CHECK-X86-NEXT:.cfi_def_cfa_offset 8
+; CHECK-X86-NEXT:pushl %edi
+; CHECK-X86-NEXT:.cfi_def_cfa_offset 12
+; CHECK-X86-NEXT:pushl %esi
+; CHECK-X86-NEXT:.cfi_def_cfa_offset 16
+; CHECK-X86-NEXT:.cfi_offset %esi, -16
+; CHECK-X86-NEXT:.cfi_offset %edi, -12
+; CHECK-X86-NEXT:.cfi_offset %ebx, -8
+; CHECK-X86-NEXT:movl {{[0-9]+}}(%esp), %eax
+; CHECK-X86-NEXT:movl {{[0-9]+}}(%esp), %ecx
+; CHECK-X86-N

[clang] [llvm] [WIP] Correct lowering of `fp128` intrinsics (PR #76558)

2024-01-20 Thread Trevor Gross via cfe-commits

https://github.com/tgross35 updated 
https://github.com/llvm/llvm-project/pull/76558

>From e869ad1bc601d95b6364dc5619e79a06e8b0fc82 Mon Sep 17 00:00:00 2001
From: Trevor Gross 
Date: Fri, 11 Aug 2023 22:16:01 -0400
Subject: [PATCH 1/4] [IR] Add an xpassing test for `f128` intrinsic lowering

`f128` intrinsic functions lower to incorrect libc calls. Add a test
showing current behavior.
---
 .../CodeGen/Generic/f128-math-lowering.ll | 610 ++
 1 file changed, 610 insertions(+)
 create mode 100644 llvm/test/CodeGen/Generic/f128-math-lowering.ll

diff --git a/llvm/test/CodeGen/Generic/f128-math-lowering.ll 
b/llvm/test/CodeGen/Generic/f128-math-lowering.ll
new file mode 100644
index 00..8a70786d97fe67
--- /dev/null
+++ b/llvm/test/CodeGen/Generic/f128-math-lowering.ll
@@ -0,0 +1,610 @@
+
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 
UTC_ARGS: --version 2
+;
+; RUN: llc < %s -mtriple=aarch64-unknown-unknown -verify-machineinstrs | 
FileCheck %s --check-prefix=CHECK-AARCH64
+; RUN: llc < %s -mtriple=riscv32-unknown-unknown -verify-machineinstrs | 
FileCheck %s --check-prefix=CHECK-RISCV32
+; RUN: llc < %s -mtriple=s390x-unknown-unknown -verify-machineinstrs | 
FileCheck %s --check-prefix=CHECK-S390X
+; RUN: llc < %s -mtriple=i686-unknown-unknown   -verify-machineinstrs | 
FileCheck %s --check-prefix=CHECK-X86
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -verify-machineinstrs | 
FileCheck %s --check-prefix=CHECK-X64
+;
+; Verify that fp128 intrinsics only lower to `long double` calls on platforms
+; where `f128` and `long double` have the same layout.
+;
+; We test on x86 and x64 which have 80-bit ld, as well as aarch64 (ld == f128),
+; riscv32 (ld == f64), and s380x (ld == f128 with different alignment from
+; x64/aarch64 f128).
+;
+; FIXME: these emit calls to long double functions but should emit f128 calls
+
+define fp128 @test_cbrtf128(fp128 %a) {
+; CHECK-LABEL:  test_cbrtf128:
+; CHECK-AARCH64:b llvm.cbrt.f128
+; CHECK-RISCV32:call llvm.cbrt.f128@plt
+; CHECK-S390X:  brasl {{%.*}} llvm.cbrt.f128@PLT
+; CHECK-X64:jmp llvm.cbrt.f128@PLT # TAILCALL
+; CHECK-X86:calll llvm.cbrt.f128@PLT
+start:
+  %0 = tail call fp128 @llvm.cbrt.f128(fp128 %a)
+  ret fp128 %0
+}
+
+declare fp128 @llvm.cbrt.f128(fp128)
+
+
+define fp128 @test_ceilf128(fp128 %a) {
+; CHECK-LABEL:  test_ceilf128:
+; CHECK-AARCH64:b ceill
+; CHECK-RISCV32:call ceill@plt
+; CHECK-S390X:  brasl {{%.*}} ceill@PLT
+; CHECK-X64:jmp ceill@PLT
+; CHECK-X86:calll ceill
+start:
+  %0 = tail call fp128 @llvm.ceil.f128(fp128 %a)
+  ret fp128 %0
+}
+
+declare fp128 @llvm.ceil.f128(fp128)
+
+
+define fp128 @test_copysignf128(fp128 %a, fp128 %b) {
+; No math library call here, so make sure the assembly does the correct thing.
+; This test is autogenerated
+; CHECK-LABEL:test_copysignf128:
+; CHECK-AARCH64-LABEL: test_copysignf128:
+; CHECK-AARCH64:   // %bb.0: // %start
+; CHECK-AARCH64-NEXT:stp q0, q1, [sp, #-32]!
+; CHECK-AARCH64-NEXT:.cfi_def_cfa_offset 32
+; CHECK-AARCH64-NEXT:ldrb w8, [sp, #15]
+; CHECK-AARCH64-NEXT:ldrb w9, [sp, #31]
+; CHECK-AARCH64-NEXT:bfxil w9, w8, #0, #7
+; CHECK-AARCH64-NEXT:strb w9, [sp, #15]
+; CHECK-AARCH64-NEXT:ldr q0, [sp], #32
+; CHECK-AARCH64-NEXT:ret
+;
+; CHECK-RISCV32-LABEL: test_copysignf128:
+; CHECK-RISCV32:   # %bb.0: # %start
+; CHECK-RISCV32-NEXT:lw a3, 0(a1)
+; CHECK-RISCV32-NEXT:lw a4, 4(a1)
+; CHECK-RISCV32-NEXT:lw a2, 12(a2)
+; CHECK-RISCV32-NEXT:lw a5, 12(a1)
+; CHECK-RISCV32-NEXT:lw a1, 8(a1)
+; CHECK-RISCV32-NEXT:lui a6, 524288
+; CHECK-RISCV32-NEXT:and a2, a2, a6
+; CHECK-RISCV32-NEXT:slli a5, a5, 1
+; CHECK-RISCV32-NEXT:srli a5, a5, 1
+; CHECK-RISCV32-NEXT:or a2, a5, a2
+; CHECK-RISCV32-NEXT:sw a1, 8(a0)
+; CHECK-RISCV32-NEXT:sw a4, 4(a0)
+; CHECK-RISCV32-NEXT:sw a3, 0(a0)
+; CHECK-RISCV32-NEXT:sw a2, 12(a0)
+; CHECK-RISCV32-NEXT:ret
+;
+; CHECK-S390X-LABEL: test_copysignf128:
+; CHECK-S390X:   # %bb.0: # %start
+; CHECK-S390X-NEXT:ld %f0, 0(%r3)
+; CHECK-S390X-NEXT:ld %f2, 8(%r3)
+; CHECK-S390X-NEXT:ld %f1, 0(%r4)
+; CHECK-S390X-NEXT:ld %f3, 8(%r4)
+; CHECK-S390X-NEXT:cpsdr %f0, %f1, %f0
+; CHECK-S390X-NEXT:std %f0, 0(%r2)
+; CHECK-S390X-NEXT:std %f2, 8(%r2)
+; CHECK-S390X-NEXT:br %r14
+;
+; CHECK-X86-LABEL: test_copysignf128:
+; CHECK-X86:   # %bb.0: # %start
+; CHECK-X86-NEXT:pushl %ebx
+; CHECK-X86-NEXT:.cfi_def_cfa_offset 8
+; CHECK-X86-NEXT:pushl %edi
+; CHECK-X86-NEXT:.cfi_def_cfa_offset 12
+; CHECK-X86-NEXT:pushl %esi
+; CHECK-X86-NEXT:.cfi_def_cfa_offset 16
+; CHECK-X86-NEXT:.cfi_offset %esi, -16
+; CHECK-X86-NEXT:.cfi_offset %edi, -12
+; CHECK-X86-NEXT:.cfi_offset %ebx, -8
+; CHECK-X86-NEXT:movl {{[0-9]+}}(%esp), %eax
+; CHECK-X86-NEXT:movl {{[0-9]+}}(%esp), %ecx
+; CHECK-X86-N

[clang] [llvm] [WIP] Correct lowering of `fp128` intrinsics (PR #76558)

2024-01-20 Thread Trevor Gross via cfe-commits

https://github.com/tgross35 updated 
https://github.com/llvm/llvm-project/pull/76558

>From 90a465d0a7e9744a4a8043152016e500927a0d95 Mon Sep 17 00:00:00 2001
From: Trevor Gross 
Date: Fri, 11 Aug 2023 22:16:01 -0400
Subject: [PATCH 1/4] [IR] Add an xpassing test for `f128` intrinsic lowering

`f128` intrinsic functions lower to incorrect libc calls. Add a test
showing current behavior.
---
 .../CodeGen/Generic/f128-math-lowering.ll | 610 ++
 1 file changed, 610 insertions(+)
 create mode 100644 llvm/test/CodeGen/Generic/f128-math-lowering.ll

diff --git a/llvm/test/CodeGen/Generic/f128-math-lowering.ll 
b/llvm/test/CodeGen/Generic/f128-math-lowering.ll
new file mode 100644
index 00..8a70786d97fe67
--- /dev/null
+++ b/llvm/test/CodeGen/Generic/f128-math-lowering.ll
@@ -0,0 +1,610 @@
+
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 
UTC_ARGS: --version 2
+;
+; RUN: llc < %s -mtriple=aarch64-unknown-unknown -verify-machineinstrs | 
FileCheck %s --check-prefix=CHECK-AARCH64
+; RUN: llc < %s -mtriple=riscv32-unknown-unknown -verify-machineinstrs | 
FileCheck %s --check-prefix=CHECK-RISCV32
+; RUN: llc < %s -mtriple=s390x-unknown-unknown -verify-machineinstrs | 
FileCheck %s --check-prefix=CHECK-S390X
+; RUN: llc < %s -mtriple=i686-unknown-unknown   -verify-machineinstrs | 
FileCheck %s --check-prefix=CHECK-X86
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -verify-machineinstrs | 
FileCheck %s --check-prefix=CHECK-X64
+;
+; Verify that fp128 intrinsics only lower to `long double` calls on platforms
+; where `f128` and `long double` have the same layout.
+;
+; We test on x86 and x64 which have 80-bit ld, as well as aarch64 (ld == f128),
+; riscv32 (ld == f64), and s380x (ld == f128 with different alignment from
+; x64/aarch64 f128).
+;
+; FIXME: these emit calls to long double functions but should emit f128 calls
+
+define fp128 @test_cbrtf128(fp128 %a) {
+; CHECK-LABEL:  test_cbrtf128:
+; CHECK-AARCH64:b llvm.cbrt.f128
+; CHECK-RISCV32:call llvm.cbrt.f128@plt
+; CHECK-S390X:  brasl {{%.*}} llvm.cbrt.f128@PLT
+; CHECK-X64:jmp llvm.cbrt.f128@PLT # TAILCALL
+; CHECK-X86:calll llvm.cbrt.f128@PLT
+start:
+  %0 = tail call fp128 @llvm.cbrt.f128(fp128 %a)
+  ret fp128 %0
+}
+
+declare fp128 @llvm.cbrt.f128(fp128)
+
+
+define fp128 @test_ceilf128(fp128 %a) {
+; CHECK-LABEL:  test_ceilf128:
+; CHECK-AARCH64:b ceill
+; CHECK-RISCV32:call ceill@plt
+; CHECK-S390X:  brasl {{%.*}} ceill@PLT
+; CHECK-X64:jmp ceill@PLT
+; CHECK-X86:calll ceill
+start:
+  %0 = tail call fp128 @llvm.ceil.f128(fp128 %a)
+  ret fp128 %0
+}
+
+declare fp128 @llvm.ceil.f128(fp128)
+
+
+define fp128 @test_copysignf128(fp128 %a, fp128 %b) {
+; No math library call here, so make sure the assembly does the correct thing.
+; This test is autogenerated
+; CHECK-LABEL:test_copysignf128:
+; CHECK-AARCH64-LABEL: test_copysignf128:
+; CHECK-AARCH64:   // %bb.0: // %start
+; CHECK-AARCH64-NEXT:stp q0, q1, [sp, #-32]!
+; CHECK-AARCH64-NEXT:.cfi_def_cfa_offset 32
+; CHECK-AARCH64-NEXT:ldrb w8, [sp, #15]
+; CHECK-AARCH64-NEXT:ldrb w9, [sp, #31]
+; CHECK-AARCH64-NEXT:bfxil w9, w8, #0, #7
+; CHECK-AARCH64-NEXT:strb w9, [sp, #15]
+; CHECK-AARCH64-NEXT:ldr q0, [sp], #32
+; CHECK-AARCH64-NEXT:ret
+;
+; CHECK-RISCV32-LABEL: test_copysignf128:
+; CHECK-RISCV32:   # %bb.0: # %start
+; CHECK-RISCV32-NEXT:lw a3, 0(a1)
+; CHECK-RISCV32-NEXT:lw a4, 4(a1)
+; CHECK-RISCV32-NEXT:lw a2, 12(a2)
+; CHECK-RISCV32-NEXT:lw a5, 12(a1)
+; CHECK-RISCV32-NEXT:lw a1, 8(a1)
+; CHECK-RISCV32-NEXT:lui a6, 524288
+; CHECK-RISCV32-NEXT:and a2, a2, a6
+; CHECK-RISCV32-NEXT:slli a5, a5, 1
+; CHECK-RISCV32-NEXT:srli a5, a5, 1
+; CHECK-RISCV32-NEXT:or a2, a5, a2
+; CHECK-RISCV32-NEXT:sw a1, 8(a0)
+; CHECK-RISCV32-NEXT:sw a4, 4(a0)
+; CHECK-RISCV32-NEXT:sw a3, 0(a0)
+; CHECK-RISCV32-NEXT:sw a2, 12(a0)
+; CHECK-RISCV32-NEXT:ret
+;
+; CHECK-S390X-LABEL: test_copysignf128:
+; CHECK-S390X:   # %bb.0: # %start
+; CHECK-S390X-NEXT:ld %f0, 0(%r3)
+; CHECK-S390X-NEXT:ld %f2, 8(%r3)
+; CHECK-S390X-NEXT:ld %f1, 0(%r4)
+; CHECK-S390X-NEXT:ld %f3, 8(%r4)
+; CHECK-S390X-NEXT:cpsdr %f0, %f1, %f0
+; CHECK-S390X-NEXT:std %f0, 0(%r2)
+; CHECK-S390X-NEXT:std %f2, 8(%r2)
+; CHECK-S390X-NEXT:br %r14
+;
+; CHECK-X86-LABEL: test_copysignf128:
+; CHECK-X86:   # %bb.0: # %start
+; CHECK-X86-NEXT:pushl %ebx
+; CHECK-X86-NEXT:.cfi_def_cfa_offset 8
+; CHECK-X86-NEXT:pushl %edi
+; CHECK-X86-NEXT:.cfi_def_cfa_offset 12
+; CHECK-X86-NEXT:pushl %esi
+; CHECK-X86-NEXT:.cfi_def_cfa_offset 16
+; CHECK-X86-NEXT:.cfi_offset %esi, -16
+; CHECK-X86-NEXT:.cfi_offset %edi, -12
+; CHECK-X86-NEXT:.cfi_offset %ebx, -8
+; CHECK-X86-NEXT:movl {{[0-9]+}}(%esp), %eax
+; CHECK-X86-NEXT:movl {{[0-9]+}}(%esp), %ecx
+; CHECK-X86-N

[clang] [llvm] [WIP] Correct lowering of `fp128` intrinsics (PR #76558)

2024-01-13 Thread Trevor Gross via cfe-commits

https://github.com/tgross35 updated 
https://github.com/llvm/llvm-project/pull/76558

>From 90a465d0a7e9744a4a8043152016e500927a0d95 Mon Sep 17 00:00:00 2001
From: Trevor Gross 
Date: Fri, 11 Aug 2023 22:16:01 -0400
Subject: [PATCH 1/4] [IR] Add an xpassing test for `f128` intrinsic lowering

`f128` intrinsic functions lower to incorrect libc calls. Add a test
showing current behavior.
---
 .../CodeGen/Generic/f128-math-lowering.ll | 610 ++
 1 file changed, 610 insertions(+)
 create mode 100644 llvm/test/CodeGen/Generic/f128-math-lowering.ll

diff --git a/llvm/test/CodeGen/Generic/f128-math-lowering.ll 
b/llvm/test/CodeGen/Generic/f128-math-lowering.ll
new file mode 100644
index 00..8a70786d97fe67
--- /dev/null
+++ b/llvm/test/CodeGen/Generic/f128-math-lowering.ll
@@ -0,0 +1,610 @@
+
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 
UTC_ARGS: --version 2
+;
+; RUN: llc < %s -mtriple=aarch64-unknown-unknown -verify-machineinstrs | 
FileCheck %s --check-prefix=CHECK-AARCH64
+; RUN: llc < %s -mtriple=riscv32-unknown-unknown -verify-machineinstrs | 
FileCheck %s --check-prefix=CHECK-RISCV32
+; RUN: llc < %s -mtriple=s390x-unknown-unknown -verify-machineinstrs | 
FileCheck %s --check-prefix=CHECK-S390X
+; RUN: llc < %s -mtriple=i686-unknown-unknown   -verify-machineinstrs | 
FileCheck %s --check-prefix=CHECK-X86
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -verify-machineinstrs | 
FileCheck %s --check-prefix=CHECK-X64
+;
+; Verify that fp128 intrinsics only lower to `long double` calls on platforms
+; where `f128` and `long double` have the same layout.
+;
+; We test on x86 and x64 which have 80-bit ld, as well as aarch64 (ld == f128),
+; riscv32 (ld == f64), and s380x (ld == f128 with different alignment from
+; x64/aarch64 f128).
+;
+; FIXME: these emit calls to long double functions but should emit f128 calls
+
+define fp128 @test_cbrtf128(fp128 %a) {
+; CHECK-LABEL:  test_cbrtf128:
+; CHECK-AARCH64:b llvm.cbrt.f128
+; CHECK-RISCV32:call llvm.cbrt.f128@plt
+; CHECK-S390X:  brasl {{%.*}} llvm.cbrt.f128@PLT
+; CHECK-X64:jmp llvm.cbrt.f128@PLT # TAILCALL
+; CHECK-X86:calll llvm.cbrt.f128@PLT
+start:
+  %0 = tail call fp128 @llvm.cbrt.f128(fp128 %a)
+  ret fp128 %0
+}
+
+declare fp128 @llvm.cbrt.f128(fp128)
+
+
+define fp128 @test_ceilf128(fp128 %a) {
+; CHECK-LABEL:  test_ceilf128:
+; CHECK-AARCH64:b ceill
+; CHECK-RISCV32:call ceill@plt
+; CHECK-S390X:  brasl {{%.*}} ceill@PLT
+; CHECK-X64:jmp ceill@PLT
+; CHECK-X86:calll ceill
+start:
+  %0 = tail call fp128 @llvm.ceil.f128(fp128 %a)
+  ret fp128 %0
+}
+
+declare fp128 @llvm.ceil.f128(fp128)
+
+
+define fp128 @test_copysignf128(fp128 %a, fp128 %b) {
+; No math library call here, so make sure the assembly does the correct thing.
+; This test is autogenerated
+; CHECK-LABEL:test_copysignf128:
+; CHECK-AARCH64-LABEL: test_copysignf128:
+; CHECK-AARCH64:   // %bb.0: // %start
+; CHECK-AARCH64-NEXT:stp q0, q1, [sp, #-32]!
+; CHECK-AARCH64-NEXT:.cfi_def_cfa_offset 32
+; CHECK-AARCH64-NEXT:ldrb w8, [sp, #15]
+; CHECK-AARCH64-NEXT:ldrb w9, [sp, #31]
+; CHECK-AARCH64-NEXT:bfxil w9, w8, #0, #7
+; CHECK-AARCH64-NEXT:strb w9, [sp, #15]
+; CHECK-AARCH64-NEXT:ldr q0, [sp], #32
+; CHECK-AARCH64-NEXT:ret
+;
+; CHECK-RISCV32-LABEL: test_copysignf128:
+; CHECK-RISCV32:   # %bb.0: # %start
+; CHECK-RISCV32-NEXT:lw a3, 0(a1)
+; CHECK-RISCV32-NEXT:lw a4, 4(a1)
+; CHECK-RISCV32-NEXT:lw a2, 12(a2)
+; CHECK-RISCV32-NEXT:lw a5, 12(a1)
+; CHECK-RISCV32-NEXT:lw a1, 8(a1)
+; CHECK-RISCV32-NEXT:lui a6, 524288
+; CHECK-RISCV32-NEXT:and a2, a2, a6
+; CHECK-RISCV32-NEXT:slli a5, a5, 1
+; CHECK-RISCV32-NEXT:srli a5, a5, 1
+; CHECK-RISCV32-NEXT:or a2, a5, a2
+; CHECK-RISCV32-NEXT:sw a1, 8(a0)
+; CHECK-RISCV32-NEXT:sw a4, 4(a0)
+; CHECK-RISCV32-NEXT:sw a3, 0(a0)
+; CHECK-RISCV32-NEXT:sw a2, 12(a0)
+; CHECK-RISCV32-NEXT:ret
+;
+; CHECK-S390X-LABEL: test_copysignf128:
+; CHECK-S390X:   # %bb.0: # %start
+; CHECK-S390X-NEXT:ld %f0, 0(%r3)
+; CHECK-S390X-NEXT:ld %f2, 8(%r3)
+; CHECK-S390X-NEXT:ld %f1, 0(%r4)
+; CHECK-S390X-NEXT:ld %f3, 8(%r4)
+; CHECK-S390X-NEXT:cpsdr %f0, %f1, %f0
+; CHECK-S390X-NEXT:std %f0, 0(%r2)
+; CHECK-S390X-NEXT:std %f2, 8(%r2)
+; CHECK-S390X-NEXT:br %r14
+;
+; CHECK-X86-LABEL: test_copysignf128:
+; CHECK-X86:   # %bb.0: # %start
+; CHECK-X86-NEXT:pushl %ebx
+; CHECK-X86-NEXT:.cfi_def_cfa_offset 8
+; CHECK-X86-NEXT:pushl %edi
+; CHECK-X86-NEXT:.cfi_def_cfa_offset 12
+; CHECK-X86-NEXT:pushl %esi
+; CHECK-X86-NEXT:.cfi_def_cfa_offset 16
+; CHECK-X86-NEXT:.cfi_offset %esi, -16
+; CHECK-X86-NEXT:.cfi_offset %edi, -12
+; CHECK-X86-NEXT:.cfi_offset %ebx, -8
+; CHECK-X86-NEXT:movl {{[0-9]+}}(%esp), %eax
+; CHECK-X86-NEXT:movl {{[0-9]+}}(%esp), %ecx
+; CHECK-X86-N

[clang] [llvm] [WIP] Correct lowering of `fp128` intrinsics (PR #76558)

2024-01-13 Thread Trevor Gross via cfe-commits

https://github.com/tgross35 updated 
https://github.com/llvm/llvm-project/pull/76558

>From 90a465d0a7e9744a4a8043152016e500927a0d95 Mon Sep 17 00:00:00 2001
From: Trevor Gross 
Date: Fri, 11 Aug 2023 22:16:01 -0400
Subject: [PATCH 1/4] [IR] Add an xpassing test for `f128` intrinsic lowering

`f128` intrinsic functions lower to incorrect libc calls. Add a test
showing current behavior.
---
 .../CodeGen/Generic/f128-math-lowering.ll | 610 ++
 1 file changed, 610 insertions(+)
 create mode 100644 llvm/test/CodeGen/Generic/f128-math-lowering.ll

diff --git a/llvm/test/CodeGen/Generic/f128-math-lowering.ll 
b/llvm/test/CodeGen/Generic/f128-math-lowering.ll
new file mode 100644
index 00..8a70786d97fe67
--- /dev/null
+++ b/llvm/test/CodeGen/Generic/f128-math-lowering.ll
@@ -0,0 +1,610 @@
+
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 
UTC_ARGS: --version 2
+;
+; RUN: llc < %s -mtriple=aarch64-unknown-unknown -verify-machineinstrs | 
FileCheck %s --check-prefix=CHECK-AARCH64
+; RUN: llc < %s -mtriple=riscv32-unknown-unknown -verify-machineinstrs | 
FileCheck %s --check-prefix=CHECK-RISCV32
+; RUN: llc < %s -mtriple=s390x-unknown-unknown -verify-machineinstrs | 
FileCheck %s --check-prefix=CHECK-S390X
+; RUN: llc < %s -mtriple=i686-unknown-unknown   -verify-machineinstrs | 
FileCheck %s --check-prefix=CHECK-X86
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -verify-machineinstrs | 
FileCheck %s --check-prefix=CHECK-X64
+;
+; Verify that fp128 intrinsics only lower to `long double` calls on platforms
+; where `f128` and `long double` have the same layout.
+;
+; We test on x86 and x64 which have 80-bit ld, as well as aarch64 (ld == f128),
+; riscv32 (ld == f64), and s380x (ld == f128 with different alignment from
+; x64/aarch64 f128).
+;
+; FIXME: these emit calls to long double functions but should emit f128 calls
+
+define fp128 @test_cbrtf128(fp128 %a) {
+; CHECK-LABEL:  test_cbrtf128:
+; CHECK-AARCH64:b llvm.cbrt.f128
+; CHECK-RISCV32:call llvm.cbrt.f128@plt
+; CHECK-S390X:  brasl {{%.*}} llvm.cbrt.f128@PLT
+; CHECK-X64:jmp llvm.cbrt.f128@PLT # TAILCALL
+; CHECK-X86:calll llvm.cbrt.f128@PLT
+start:
+  %0 = tail call fp128 @llvm.cbrt.f128(fp128 %a)
+  ret fp128 %0
+}
+
+declare fp128 @llvm.cbrt.f128(fp128)
+
+
+define fp128 @test_ceilf128(fp128 %a) {
+; CHECK-LABEL:  test_ceilf128:
+; CHECK-AARCH64:b ceill
+; CHECK-RISCV32:call ceill@plt
+; CHECK-S390X:  brasl {{%.*}} ceill@PLT
+; CHECK-X64:jmp ceill@PLT
+; CHECK-X86:calll ceill
+start:
+  %0 = tail call fp128 @llvm.ceil.f128(fp128 %a)
+  ret fp128 %0
+}
+
+declare fp128 @llvm.ceil.f128(fp128)
+
+
+define fp128 @test_copysignf128(fp128 %a, fp128 %b) {
+; No math library call here, so make sure the assembly does the correct thing.
+; This test is autogenerated
+; CHECK-LABEL:test_copysignf128:
+; CHECK-AARCH64-LABEL: test_copysignf128:
+; CHECK-AARCH64:   // %bb.0: // %start
+; CHECK-AARCH64-NEXT:stp q0, q1, [sp, #-32]!
+; CHECK-AARCH64-NEXT:.cfi_def_cfa_offset 32
+; CHECK-AARCH64-NEXT:ldrb w8, [sp, #15]
+; CHECK-AARCH64-NEXT:ldrb w9, [sp, #31]
+; CHECK-AARCH64-NEXT:bfxil w9, w8, #0, #7
+; CHECK-AARCH64-NEXT:strb w9, [sp, #15]
+; CHECK-AARCH64-NEXT:ldr q0, [sp], #32
+; CHECK-AARCH64-NEXT:ret
+;
+; CHECK-RISCV32-LABEL: test_copysignf128:
+; CHECK-RISCV32:   # %bb.0: # %start
+; CHECK-RISCV32-NEXT:lw a3, 0(a1)
+; CHECK-RISCV32-NEXT:lw a4, 4(a1)
+; CHECK-RISCV32-NEXT:lw a2, 12(a2)
+; CHECK-RISCV32-NEXT:lw a5, 12(a1)
+; CHECK-RISCV32-NEXT:lw a1, 8(a1)
+; CHECK-RISCV32-NEXT:lui a6, 524288
+; CHECK-RISCV32-NEXT:and a2, a2, a6
+; CHECK-RISCV32-NEXT:slli a5, a5, 1
+; CHECK-RISCV32-NEXT:srli a5, a5, 1
+; CHECK-RISCV32-NEXT:or a2, a5, a2
+; CHECK-RISCV32-NEXT:sw a1, 8(a0)
+; CHECK-RISCV32-NEXT:sw a4, 4(a0)
+; CHECK-RISCV32-NEXT:sw a3, 0(a0)
+; CHECK-RISCV32-NEXT:sw a2, 12(a0)
+; CHECK-RISCV32-NEXT:ret
+;
+; CHECK-S390X-LABEL: test_copysignf128:
+; CHECK-S390X:   # %bb.0: # %start
+; CHECK-S390X-NEXT:ld %f0, 0(%r3)
+; CHECK-S390X-NEXT:ld %f2, 8(%r3)
+; CHECK-S390X-NEXT:ld %f1, 0(%r4)
+; CHECK-S390X-NEXT:ld %f3, 8(%r4)
+; CHECK-S390X-NEXT:cpsdr %f0, %f1, %f0
+; CHECK-S390X-NEXT:std %f0, 0(%r2)
+; CHECK-S390X-NEXT:std %f2, 8(%r2)
+; CHECK-S390X-NEXT:br %r14
+;
+; CHECK-X86-LABEL: test_copysignf128:
+; CHECK-X86:   # %bb.0: # %start
+; CHECK-X86-NEXT:pushl %ebx
+; CHECK-X86-NEXT:.cfi_def_cfa_offset 8
+; CHECK-X86-NEXT:pushl %edi
+; CHECK-X86-NEXT:.cfi_def_cfa_offset 12
+; CHECK-X86-NEXT:pushl %esi
+; CHECK-X86-NEXT:.cfi_def_cfa_offset 16
+; CHECK-X86-NEXT:.cfi_offset %esi, -16
+; CHECK-X86-NEXT:.cfi_offset %edi, -12
+; CHECK-X86-NEXT:.cfi_offset %ebx, -8
+; CHECK-X86-NEXT:movl {{[0-9]+}}(%esp), %eax
+; CHECK-X86-NEXT:movl {{[0-9]+}}(%esp), %ecx
+; CHECK-X86-N

[clang] [llvm] [WIP] Correct lowering of `fp128` intrinsics (PR #76558)

2024-01-13 Thread Trevor Gross via cfe-commits

https://github.com/tgross35 updated 
https://github.com/llvm/llvm-project/pull/76558

>From 946581e0c6a06be92b16d74199b58a72be4b76f3 Mon Sep 17 00:00:00 2001
From: Trevor Gross 
Date: Fri, 11 Aug 2023 22:16:01 -0400
Subject: [PATCH 1/4] [IR] Add an xpassing test for `f128` intrinsic lowering

`f128` intrinsic functions lower to incorrect libc calls. Add a test
showing current behavior.
---
 .../CodeGen/Generic/f128-math-lowering.ll | 610 ++
 1 file changed, 610 insertions(+)
 create mode 100644 llvm/test/CodeGen/Generic/f128-math-lowering.ll

diff --git a/llvm/test/CodeGen/Generic/f128-math-lowering.ll 
b/llvm/test/CodeGen/Generic/f128-math-lowering.ll
new file mode 100644
index 00..8a70786d97fe67
--- /dev/null
+++ b/llvm/test/CodeGen/Generic/f128-math-lowering.ll
@@ -0,0 +1,610 @@
+
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 
UTC_ARGS: --version 2
+;
+; RUN: llc < %s -mtriple=aarch64-unknown-unknown -verify-machineinstrs | 
FileCheck %s --check-prefix=CHECK-AARCH64
+; RUN: llc < %s -mtriple=riscv32-unknown-unknown -verify-machineinstrs | 
FileCheck %s --check-prefix=CHECK-RISCV32
+; RUN: llc < %s -mtriple=s390x-unknown-unknown -verify-machineinstrs | 
FileCheck %s --check-prefix=CHECK-S390X
+; RUN: llc < %s -mtriple=i686-unknown-unknown   -verify-machineinstrs | 
FileCheck %s --check-prefix=CHECK-X86
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -verify-machineinstrs | 
FileCheck %s --check-prefix=CHECK-X64
+;
+; Verify that fp128 intrinsics only lower to `long double` calls on platforms
+; where `f128` and `long double` have the same layout.
+;
+; We test on x86 and x64 which have 80-bit ld, as well as aarch64 (ld == f128),
+; riscv32 (ld == f64), and s380x (ld == f128 with different alignment from
+; x64/aarch64 f128).
+;
+; FIXME: these emit calls to long double functions but should emit f128 calls
+
+define fp128 @test_cbrtf128(fp128 %a) {
+; CHECK-LABEL:  test_cbrtf128:
+; CHECK-AARCH64:b llvm.cbrt.f128
+; CHECK-RISCV32:call llvm.cbrt.f128@plt
+; CHECK-S390X:  brasl {{%.*}} llvm.cbrt.f128@PLT
+; CHECK-X64:jmp llvm.cbrt.f128@PLT # TAILCALL
+; CHECK-X86:calll llvm.cbrt.f128@PLT
+start:
+  %0 = tail call fp128 @llvm.cbrt.f128(fp128 %a)
+  ret fp128 %0
+}
+
+declare fp128 @llvm.cbrt.f128(fp128)
+
+
+define fp128 @test_ceilf128(fp128 %a) {
+; CHECK-LABEL:  test_ceilf128:
+; CHECK-AARCH64:b ceill
+; CHECK-RISCV32:call ceill@plt
+; CHECK-S390X:  brasl {{%.*}} ceill@PLT
+; CHECK-X64:jmp ceill@PLT
+; CHECK-X86:calll ceill
+start:
+  %0 = tail call fp128 @llvm.ceil.f128(fp128 %a)
+  ret fp128 %0
+}
+
+declare fp128 @llvm.ceil.f128(fp128)
+
+
+define fp128 @test_copysignf128(fp128 %a, fp128 %b) {
+; No math library call here, so make sure the assembly does the correct thing.
+; This test is autogenerated
+; CHECK-LABEL:test_copysignf128:
+; CHECK-AARCH64-LABEL: test_copysignf128:
+; CHECK-AARCH64:   // %bb.0: // %start
+; CHECK-AARCH64-NEXT:stp q0, q1, [sp, #-32]!
+; CHECK-AARCH64-NEXT:.cfi_def_cfa_offset 32
+; CHECK-AARCH64-NEXT:ldrb w8, [sp, #15]
+; CHECK-AARCH64-NEXT:ldrb w9, [sp, #31]
+; CHECK-AARCH64-NEXT:bfxil w9, w8, #0, #7
+; CHECK-AARCH64-NEXT:strb w9, [sp, #15]
+; CHECK-AARCH64-NEXT:ldr q0, [sp], #32
+; CHECK-AARCH64-NEXT:ret
+;
+; CHECK-RISCV32-LABEL: test_copysignf128:
+; CHECK-RISCV32:   # %bb.0: # %start
+; CHECK-RISCV32-NEXT:lw a3, 0(a1)
+; CHECK-RISCV32-NEXT:lw a4, 4(a1)
+; CHECK-RISCV32-NEXT:lw a2, 12(a2)
+; CHECK-RISCV32-NEXT:lw a5, 12(a1)
+; CHECK-RISCV32-NEXT:lw a1, 8(a1)
+; CHECK-RISCV32-NEXT:lui a6, 524288
+; CHECK-RISCV32-NEXT:and a2, a2, a6
+; CHECK-RISCV32-NEXT:slli a5, a5, 1
+; CHECK-RISCV32-NEXT:srli a5, a5, 1
+; CHECK-RISCV32-NEXT:or a2, a5, a2
+; CHECK-RISCV32-NEXT:sw a1, 8(a0)
+; CHECK-RISCV32-NEXT:sw a4, 4(a0)
+; CHECK-RISCV32-NEXT:sw a3, 0(a0)
+; CHECK-RISCV32-NEXT:sw a2, 12(a0)
+; CHECK-RISCV32-NEXT:ret
+;
+; CHECK-S390X-LABEL: test_copysignf128:
+; CHECK-S390X:   # %bb.0: # %start
+; CHECK-S390X-NEXT:ld %f0, 0(%r3)
+; CHECK-S390X-NEXT:ld %f2, 8(%r3)
+; CHECK-S390X-NEXT:ld %f1, 0(%r4)
+; CHECK-S390X-NEXT:ld %f3, 8(%r4)
+; CHECK-S390X-NEXT:cpsdr %f0, %f1, %f0
+; CHECK-S390X-NEXT:std %f0, 0(%r2)
+; CHECK-S390X-NEXT:std %f2, 8(%r2)
+; CHECK-S390X-NEXT:br %r14
+;
+; CHECK-X86-LABEL: test_copysignf128:
+; CHECK-X86:   # %bb.0: # %start
+; CHECK-X86-NEXT:pushl %ebx
+; CHECK-X86-NEXT:.cfi_def_cfa_offset 8
+; CHECK-X86-NEXT:pushl %edi
+; CHECK-X86-NEXT:.cfi_def_cfa_offset 12
+; CHECK-X86-NEXT:pushl %esi
+; CHECK-X86-NEXT:.cfi_def_cfa_offset 16
+; CHECK-X86-NEXT:.cfi_offset %esi, -16
+; CHECK-X86-NEXT:.cfi_offset %edi, -12
+; CHECK-X86-NEXT:.cfi_offset %ebx, -8
+; CHECK-X86-NEXT:movl {{[0-9]+}}(%esp), %eax
+; CHECK-X86-NEXT:movl {{[0-9]+}}(%esp), %ecx
+; CHECK-X86-N

[clang] [llvm] [WIP] Correct lowering of `fp128` intrinsics (PR #76558)

2024-01-13 Thread Trevor Gross via cfe-commits

https://github.com/tgross35 updated 
https://github.com/llvm/llvm-project/pull/76558

>From 590f4920ceb1a80d711d39624b0249cd9ff774d2 Mon Sep 17 00:00:00 2001
From: Trevor Gross 
Date: Fri, 11 Aug 2023 22:16:01 -0400
Subject: [PATCH 1/4] [IR] Add an xpassing test for `f128` intrinsic lowering

`f128` intrinsic functions lower to incorrect libc calls. Add a test
showing current behavior.
---
 .../CodeGen/Generic/f128-math-lowering.ll | 610 ++
 1 file changed, 610 insertions(+)
 create mode 100644 llvm/test/CodeGen/Generic/f128-math-lowering.ll

diff --git a/llvm/test/CodeGen/Generic/f128-math-lowering.ll 
b/llvm/test/CodeGen/Generic/f128-math-lowering.ll
new file mode 100644
index 00..8a70786d97fe67
--- /dev/null
+++ b/llvm/test/CodeGen/Generic/f128-math-lowering.ll
@@ -0,0 +1,610 @@
+
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 
UTC_ARGS: --version 2
+;
+; RUN: llc < %s -mtriple=aarch64-unknown-unknown -verify-machineinstrs | 
FileCheck %s --check-prefix=CHECK-AARCH64
+; RUN: llc < %s -mtriple=riscv32-unknown-unknown -verify-machineinstrs | 
FileCheck %s --check-prefix=CHECK-RISCV32
+; RUN: llc < %s -mtriple=s390x-unknown-unknown -verify-machineinstrs | 
FileCheck %s --check-prefix=CHECK-S390X
+; RUN: llc < %s -mtriple=i686-unknown-unknown   -verify-machineinstrs | 
FileCheck %s --check-prefix=CHECK-X86
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -verify-machineinstrs | 
FileCheck %s --check-prefix=CHECK-X64
+;
+; Verify that fp128 intrinsics only lower to `long double` calls on platforms
+; where `f128` and `long double` have the same layout.
+;
+; We test on x86 and x64 which have 80-bit ld, as well as aarch64 (ld == f128),
+; riscv32 (ld == f64), and s380x (ld == f128 with different alignment from
+; x64/aarch64 f128).
+;
+; FIXME: these emit calls to long double functions but should emit f128 calls
+
+define fp128 @test_cbrtf128(fp128 %a) {
+; CHECK-LABEL:  test_cbrtf128:
+; CHECK-AARCH64:b llvm.cbrt.f128
+; CHECK-RISCV32:call llvm.cbrt.f128@plt
+; CHECK-S390X:  brasl {{%.*}} llvm.cbrt.f128@PLT
+; CHECK-X64:jmp llvm.cbrt.f128@PLT # TAILCALL
+; CHECK-X86:calll llvm.cbrt.f128@PLT
+start:
+  %0 = tail call fp128 @llvm.cbrt.f128(fp128 %a)
+  ret fp128 %0
+}
+
+declare fp128 @llvm.cbrt.f128(fp128)
+
+
+define fp128 @test_ceilf128(fp128 %a) {
+; CHECK-LABEL:  test_ceilf128:
+; CHECK-AARCH64:b ceill
+; CHECK-RISCV32:call ceill@plt
+; CHECK-S390X:  brasl {{%.*}} ceill@PLT
+; CHECK-X64:jmp ceill@PLT
+; CHECK-X86:calll ceill
+start:
+  %0 = tail call fp128 @llvm.ceil.f128(fp128 %a)
+  ret fp128 %0
+}
+
+declare fp128 @llvm.ceil.f128(fp128)
+
+
+define fp128 @test_copysignf128(fp128 %a, fp128 %b) {
+; No math library call here, so make sure the assembly does the correct thing.
+; This test is autogenerated
+; CHECK-LABEL:test_copysignf128:
+; CHECK-AARCH64-LABEL: test_copysignf128:
+; CHECK-AARCH64:   // %bb.0: // %start
+; CHECK-AARCH64-NEXT:stp q0, q1, [sp, #-32]!
+; CHECK-AARCH64-NEXT:.cfi_def_cfa_offset 32
+; CHECK-AARCH64-NEXT:ldrb w8, [sp, #15]
+; CHECK-AARCH64-NEXT:ldrb w9, [sp, #31]
+; CHECK-AARCH64-NEXT:bfxil w9, w8, #0, #7
+; CHECK-AARCH64-NEXT:strb w9, [sp, #15]
+; CHECK-AARCH64-NEXT:ldr q0, [sp], #32
+; CHECK-AARCH64-NEXT:ret
+;
+; CHECK-RISCV32-LABEL: test_copysignf128:
+; CHECK-RISCV32:   # %bb.0: # %start
+; CHECK-RISCV32-NEXT:lw a3, 0(a1)
+; CHECK-RISCV32-NEXT:lw a4, 4(a1)
+; CHECK-RISCV32-NEXT:lw a2, 12(a2)
+; CHECK-RISCV32-NEXT:lw a5, 12(a1)
+; CHECK-RISCV32-NEXT:lw a1, 8(a1)
+; CHECK-RISCV32-NEXT:lui a6, 524288
+; CHECK-RISCV32-NEXT:and a2, a2, a6
+; CHECK-RISCV32-NEXT:slli a5, a5, 1
+; CHECK-RISCV32-NEXT:srli a5, a5, 1
+; CHECK-RISCV32-NEXT:or a2, a5, a2
+; CHECK-RISCV32-NEXT:sw a1, 8(a0)
+; CHECK-RISCV32-NEXT:sw a4, 4(a0)
+; CHECK-RISCV32-NEXT:sw a3, 0(a0)
+; CHECK-RISCV32-NEXT:sw a2, 12(a0)
+; CHECK-RISCV32-NEXT:ret
+;
+; CHECK-S390X-LABEL: test_copysignf128:
+; CHECK-S390X:   # %bb.0: # %start
+; CHECK-S390X-NEXT:ld %f0, 0(%r3)
+; CHECK-S390X-NEXT:ld %f2, 8(%r3)
+; CHECK-S390X-NEXT:ld %f1, 0(%r4)
+; CHECK-S390X-NEXT:ld %f3, 8(%r4)
+; CHECK-S390X-NEXT:cpsdr %f0, %f1, %f0
+; CHECK-S390X-NEXT:std %f0, 0(%r2)
+; CHECK-S390X-NEXT:std %f2, 8(%r2)
+; CHECK-S390X-NEXT:br %r14
+;
+; CHECK-X86-LABEL: test_copysignf128:
+; CHECK-X86:   # %bb.0: # %start
+; CHECK-X86-NEXT:pushl %ebx
+; CHECK-X86-NEXT:.cfi_def_cfa_offset 8
+; CHECK-X86-NEXT:pushl %edi
+; CHECK-X86-NEXT:.cfi_def_cfa_offset 12
+; CHECK-X86-NEXT:pushl %esi
+; CHECK-X86-NEXT:.cfi_def_cfa_offset 16
+; CHECK-X86-NEXT:.cfi_offset %esi, -16
+; CHECK-X86-NEXT:.cfi_offset %edi, -12
+; CHECK-X86-NEXT:.cfi_offset %ebx, -8
+; CHECK-X86-NEXT:movl {{[0-9]+}}(%esp), %eax
+; CHECK-X86-NEXT:movl {{[0-9]+}}(%esp), %ecx
+; CHECK-X86-N

[clang] [llvm] [WIP] Correct lowering of `fp128` intrinsics (PR #76558)

2023-12-29 Thread Trevor Gross via cfe-commits

https://github.com/tgross35 updated 
https://github.com/llvm/llvm-project/pull/76558

>From 590f4920ceb1a80d711d39624b0249cd9ff774d2 Mon Sep 17 00:00:00 2001
From: Trevor Gross 
Date: Fri, 11 Aug 2023 22:16:01 -0400
Subject: [PATCH 1/4] [IR] Add an xpassing test for `f128` intrinsic lowering

`f128` intrinsic functions lower to incorrect libc calls. Add a test
showing current behavior.
---
 .../CodeGen/Generic/f128-math-lowering.ll | 610 ++
 1 file changed, 610 insertions(+)
 create mode 100644 llvm/test/CodeGen/Generic/f128-math-lowering.ll

diff --git a/llvm/test/CodeGen/Generic/f128-math-lowering.ll 
b/llvm/test/CodeGen/Generic/f128-math-lowering.ll
new file mode 100644
index 00..8a70786d97fe67
--- /dev/null
+++ b/llvm/test/CodeGen/Generic/f128-math-lowering.ll
@@ -0,0 +1,610 @@
+
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 
UTC_ARGS: --version 2
+;
+; RUN: llc < %s -mtriple=aarch64-unknown-unknown -verify-machineinstrs | 
FileCheck %s --check-prefix=CHECK-AARCH64
+; RUN: llc < %s -mtriple=riscv32-unknown-unknown -verify-machineinstrs | 
FileCheck %s --check-prefix=CHECK-RISCV32
+; RUN: llc < %s -mtriple=s390x-unknown-unknown -verify-machineinstrs | 
FileCheck %s --check-prefix=CHECK-S390X
+; RUN: llc < %s -mtriple=i686-unknown-unknown   -verify-machineinstrs | 
FileCheck %s --check-prefix=CHECK-X86
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -verify-machineinstrs | 
FileCheck %s --check-prefix=CHECK-X64
+;
+; Verify that fp128 intrinsics only lower to `long double` calls on platforms
+; where `f128` and `long double` have the same layout.
+;
+; We test on x86 and x64 which have 80-bit ld, as well as aarch64 (ld == f128),
+; riscv32 (ld == f64), and s380x (ld == f128 with different alignment from
+; x64/aarch64 f128).
+;
+; FIXME: these emit calls to long double functions but should emit f128 calls
+
+define fp128 @test_cbrtf128(fp128 %a) {
+; CHECK-LABEL:  test_cbrtf128:
+; CHECK-AARCH64:b llvm.cbrt.f128
+; CHECK-RISCV32:call llvm.cbrt.f128@plt
+; CHECK-S390X:  brasl {{%.*}} llvm.cbrt.f128@PLT
+; CHECK-X64:jmp llvm.cbrt.f128@PLT # TAILCALL
+; CHECK-X86:calll llvm.cbrt.f128@PLT
+start:
+  %0 = tail call fp128 @llvm.cbrt.f128(fp128 %a)
+  ret fp128 %0
+}
+
+declare fp128 @llvm.cbrt.f128(fp128)
+
+
+define fp128 @test_ceilf128(fp128 %a) {
+; CHECK-LABEL:  test_ceilf128:
+; CHECK-AARCH64:b ceill
+; CHECK-RISCV32:call ceill@plt
+; CHECK-S390X:  brasl {{%.*}} ceill@PLT
+; CHECK-X64:jmp ceill@PLT
+; CHECK-X86:calll ceill
+start:
+  %0 = tail call fp128 @llvm.ceil.f128(fp128 %a)
+  ret fp128 %0
+}
+
+declare fp128 @llvm.ceil.f128(fp128)
+
+
+define fp128 @test_copysignf128(fp128 %a, fp128 %b) {
+; No math library call here, so make sure the assembly does the correct thing.
+; This test is autogenerated
+; CHECK-LABEL:test_copysignf128:
+; CHECK-AARCH64-LABEL: test_copysignf128:
+; CHECK-AARCH64:   // %bb.0: // %start
+; CHECK-AARCH64-NEXT:stp q0, q1, [sp, #-32]!
+; CHECK-AARCH64-NEXT:.cfi_def_cfa_offset 32
+; CHECK-AARCH64-NEXT:ldrb w8, [sp, #15]
+; CHECK-AARCH64-NEXT:ldrb w9, [sp, #31]
+; CHECK-AARCH64-NEXT:bfxil w9, w8, #0, #7
+; CHECK-AARCH64-NEXT:strb w9, [sp, #15]
+; CHECK-AARCH64-NEXT:ldr q0, [sp], #32
+; CHECK-AARCH64-NEXT:ret
+;
+; CHECK-RISCV32-LABEL: test_copysignf128:
+; CHECK-RISCV32:   # %bb.0: # %start
+; CHECK-RISCV32-NEXT:lw a3, 0(a1)
+; CHECK-RISCV32-NEXT:lw a4, 4(a1)
+; CHECK-RISCV32-NEXT:lw a2, 12(a2)
+; CHECK-RISCV32-NEXT:lw a5, 12(a1)
+; CHECK-RISCV32-NEXT:lw a1, 8(a1)
+; CHECK-RISCV32-NEXT:lui a6, 524288
+; CHECK-RISCV32-NEXT:and a2, a2, a6
+; CHECK-RISCV32-NEXT:slli a5, a5, 1
+; CHECK-RISCV32-NEXT:srli a5, a5, 1
+; CHECK-RISCV32-NEXT:or a2, a5, a2
+; CHECK-RISCV32-NEXT:sw a1, 8(a0)
+; CHECK-RISCV32-NEXT:sw a4, 4(a0)
+; CHECK-RISCV32-NEXT:sw a3, 0(a0)
+; CHECK-RISCV32-NEXT:sw a2, 12(a0)
+; CHECK-RISCV32-NEXT:ret
+;
+; CHECK-S390X-LABEL: test_copysignf128:
+; CHECK-S390X:   # %bb.0: # %start
+; CHECK-S390X-NEXT:ld %f0, 0(%r3)
+; CHECK-S390X-NEXT:ld %f2, 8(%r3)
+; CHECK-S390X-NEXT:ld %f1, 0(%r4)
+; CHECK-S390X-NEXT:ld %f3, 8(%r4)
+; CHECK-S390X-NEXT:cpsdr %f0, %f1, %f0
+; CHECK-S390X-NEXT:std %f0, 0(%r2)
+; CHECK-S390X-NEXT:std %f2, 8(%r2)
+; CHECK-S390X-NEXT:br %r14
+;
+; CHECK-X86-LABEL: test_copysignf128:
+; CHECK-X86:   # %bb.0: # %start
+; CHECK-X86-NEXT:pushl %ebx
+; CHECK-X86-NEXT:.cfi_def_cfa_offset 8
+; CHECK-X86-NEXT:pushl %edi
+; CHECK-X86-NEXT:.cfi_def_cfa_offset 12
+; CHECK-X86-NEXT:pushl %esi
+; CHECK-X86-NEXT:.cfi_def_cfa_offset 16
+; CHECK-X86-NEXT:.cfi_offset %esi, -16
+; CHECK-X86-NEXT:.cfi_offset %edi, -12
+; CHECK-X86-NEXT:.cfi_offset %ebx, -8
+; CHECK-X86-NEXT:movl {{[0-9]+}}(%esp), %eax
+; CHECK-X86-NEXT:movl {{[0-9]+}}(%esp), %ecx
+; CHECK-X86-N

[clang] [llvm] [WIP] Correct lowering of `fp128` intrinsics (PR #76558)

2023-12-29 Thread Trevor Gross via cfe-commits

https://github.com/tgross35 updated 
https://github.com/llvm/llvm-project/pull/76558

>From 590f4920ceb1a80d711d39624b0249cd9ff774d2 Mon Sep 17 00:00:00 2001
From: Trevor Gross 
Date: Fri, 11 Aug 2023 22:16:01 -0400
Subject: [PATCH 1/4] [IR] Add an xpassing test for `f128` intrinsic lowering

`f128` intrinsic functions lower to incorrect libc calls. Add a test
showing current behavior.
---
 .../CodeGen/Generic/f128-math-lowering.ll | 610 ++
 1 file changed, 610 insertions(+)
 create mode 100644 llvm/test/CodeGen/Generic/f128-math-lowering.ll

diff --git a/llvm/test/CodeGen/Generic/f128-math-lowering.ll 
b/llvm/test/CodeGen/Generic/f128-math-lowering.ll
new file mode 100644
index 00..8a70786d97fe67
--- /dev/null
+++ b/llvm/test/CodeGen/Generic/f128-math-lowering.ll
@@ -0,0 +1,610 @@
+
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 
UTC_ARGS: --version 2
+;
+; RUN: llc < %s -mtriple=aarch64-unknown-unknown -verify-machineinstrs | 
FileCheck %s --check-prefix=CHECK-AARCH64
+; RUN: llc < %s -mtriple=riscv32-unknown-unknown -verify-machineinstrs | 
FileCheck %s --check-prefix=CHECK-RISCV32
+; RUN: llc < %s -mtriple=s390x-unknown-unknown -verify-machineinstrs | 
FileCheck %s --check-prefix=CHECK-S390X
+; RUN: llc < %s -mtriple=i686-unknown-unknown   -verify-machineinstrs | 
FileCheck %s --check-prefix=CHECK-X86
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -verify-machineinstrs | 
FileCheck %s --check-prefix=CHECK-X64
+;
+; Verify that fp128 intrinsics only lower to `long double` calls on platforms
+; where `f128` and `long double` have the same layout.
+;
+; We test on x86 and x64 which have 80-bit ld, as well as aarch64 (ld == f128),
+; riscv32 (ld == f64), and s380x (ld == f128 with different alignment from
+; x64/aarch64 f128).
+;
+; FIXME: these emit calls to long double functions but should emit f128 calls
+
+define fp128 @test_cbrtf128(fp128 %a) {
+; CHECK-LABEL:  test_cbrtf128:
+; CHECK-AARCH64:b llvm.cbrt.f128
+; CHECK-RISCV32:call llvm.cbrt.f128@plt
+; CHECK-S390X:  brasl {{%.*}} llvm.cbrt.f128@PLT
+; CHECK-X64:jmp llvm.cbrt.f128@PLT # TAILCALL
+; CHECK-X86:calll llvm.cbrt.f128@PLT
+start:
+  %0 = tail call fp128 @llvm.cbrt.f128(fp128 %a)
+  ret fp128 %0
+}
+
+declare fp128 @llvm.cbrt.f128(fp128)
+
+
+define fp128 @test_ceilf128(fp128 %a) {
+; CHECK-LABEL:  test_ceilf128:
+; CHECK-AARCH64:b ceill
+; CHECK-RISCV32:call ceill@plt
+; CHECK-S390X:  brasl {{%.*}} ceill@PLT
+; CHECK-X64:jmp ceill@PLT
+; CHECK-X86:calll ceill
+start:
+  %0 = tail call fp128 @llvm.ceil.f128(fp128 %a)
+  ret fp128 %0
+}
+
+declare fp128 @llvm.ceil.f128(fp128)
+
+
+define fp128 @test_copysignf128(fp128 %a, fp128 %b) {
+; No math library call here, so make sure the assembly does the correct thing.
+; This test is autogenerated
+; CHECK-LABEL:test_copysignf128:
+; CHECK-AARCH64-LABEL: test_copysignf128:
+; CHECK-AARCH64:   // %bb.0: // %start
+; CHECK-AARCH64-NEXT:stp q0, q1, [sp, #-32]!
+; CHECK-AARCH64-NEXT:.cfi_def_cfa_offset 32
+; CHECK-AARCH64-NEXT:ldrb w8, [sp, #15]
+; CHECK-AARCH64-NEXT:ldrb w9, [sp, #31]
+; CHECK-AARCH64-NEXT:bfxil w9, w8, #0, #7
+; CHECK-AARCH64-NEXT:strb w9, [sp, #15]
+; CHECK-AARCH64-NEXT:ldr q0, [sp], #32
+; CHECK-AARCH64-NEXT:ret
+;
+; CHECK-RISCV32-LABEL: test_copysignf128:
+; CHECK-RISCV32:   # %bb.0: # %start
+; CHECK-RISCV32-NEXT:lw a3, 0(a1)
+; CHECK-RISCV32-NEXT:lw a4, 4(a1)
+; CHECK-RISCV32-NEXT:lw a2, 12(a2)
+; CHECK-RISCV32-NEXT:lw a5, 12(a1)
+; CHECK-RISCV32-NEXT:lw a1, 8(a1)
+; CHECK-RISCV32-NEXT:lui a6, 524288
+; CHECK-RISCV32-NEXT:and a2, a2, a6
+; CHECK-RISCV32-NEXT:slli a5, a5, 1
+; CHECK-RISCV32-NEXT:srli a5, a5, 1
+; CHECK-RISCV32-NEXT:or a2, a5, a2
+; CHECK-RISCV32-NEXT:sw a1, 8(a0)
+; CHECK-RISCV32-NEXT:sw a4, 4(a0)
+; CHECK-RISCV32-NEXT:sw a3, 0(a0)
+; CHECK-RISCV32-NEXT:sw a2, 12(a0)
+; CHECK-RISCV32-NEXT:ret
+;
+; CHECK-S390X-LABEL: test_copysignf128:
+; CHECK-S390X:   # %bb.0: # %start
+; CHECK-S390X-NEXT:ld %f0, 0(%r3)
+; CHECK-S390X-NEXT:ld %f2, 8(%r3)
+; CHECK-S390X-NEXT:ld %f1, 0(%r4)
+; CHECK-S390X-NEXT:ld %f3, 8(%r4)
+; CHECK-S390X-NEXT:cpsdr %f0, %f1, %f0
+; CHECK-S390X-NEXT:std %f0, 0(%r2)
+; CHECK-S390X-NEXT:std %f2, 8(%r2)
+; CHECK-S390X-NEXT:br %r14
+;
+; CHECK-X86-LABEL: test_copysignf128:
+; CHECK-X86:   # %bb.0: # %start
+; CHECK-X86-NEXT:pushl %ebx
+; CHECK-X86-NEXT:.cfi_def_cfa_offset 8
+; CHECK-X86-NEXT:pushl %edi
+; CHECK-X86-NEXT:.cfi_def_cfa_offset 12
+; CHECK-X86-NEXT:pushl %esi
+; CHECK-X86-NEXT:.cfi_def_cfa_offset 16
+; CHECK-X86-NEXT:.cfi_offset %esi, -16
+; CHECK-X86-NEXT:.cfi_offset %edi, -12
+; CHECK-X86-NEXT:.cfi_offset %ebx, -8
+; CHECK-X86-NEXT:movl {{[0-9]+}}(%esp), %eax
+; CHECK-X86-NEXT:movl {{[0-9]+}}(%esp), %ecx
+; CHECK-X86-N

[clang] [llvm] [WIP] Correct lowering of `fp128` intrinsics (PR #76558)

2023-12-29 Thread Trevor Gross via cfe-commits

https://github.com/tgross35 updated 
https://github.com/llvm/llvm-project/pull/76558

>From 590f4920ceb1a80d711d39624b0249cd9ff774d2 Mon Sep 17 00:00:00 2001
From: Trevor Gross 
Date: Fri, 11 Aug 2023 22:16:01 -0400
Subject: [PATCH 1/4] [IR] Add an xpassing test for `f128` intrinsic lowering

`f128` intrinsic functions lower to incorrect libc calls. Add a test
showing current behavior.
---
 .../CodeGen/Generic/f128-math-lowering.ll | 610 ++
 1 file changed, 610 insertions(+)
 create mode 100644 llvm/test/CodeGen/Generic/f128-math-lowering.ll

diff --git a/llvm/test/CodeGen/Generic/f128-math-lowering.ll 
b/llvm/test/CodeGen/Generic/f128-math-lowering.ll
new file mode 100644
index 00..8a70786d97fe67
--- /dev/null
+++ b/llvm/test/CodeGen/Generic/f128-math-lowering.ll
@@ -0,0 +1,610 @@
+
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 
UTC_ARGS: --version 2
+;
+; RUN: llc < %s -mtriple=aarch64-unknown-unknown -verify-machineinstrs | 
FileCheck %s --check-prefix=CHECK-AARCH64
+; RUN: llc < %s -mtriple=riscv32-unknown-unknown -verify-machineinstrs | 
FileCheck %s --check-prefix=CHECK-RISCV32
+; RUN: llc < %s -mtriple=s390x-unknown-unknown -verify-machineinstrs | 
FileCheck %s --check-prefix=CHECK-S390X
+; RUN: llc < %s -mtriple=i686-unknown-unknown   -verify-machineinstrs | 
FileCheck %s --check-prefix=CHECK-X86
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -verify-machineinstrs | 
FileCheck %s --check-prefix=CHECK-X64
+;
+; Verify that fp128 intrinsics only lower to `long double` calls on platforms
+; where `f128` and `long double` have the same layout.
+;
+; We test on x86 and x64 which have 80-bit ld, as well as aarch64 (ld == f128),
+; riscv32 (ld == f64), and s380x (ld == f128 with different alignment from
+; x64/aarch64 f128).
+;
+; FIXME: these emit calls to long double functions but should emit f128 calls
+
+define fp128 @test_cbrtf128(fp128 %a) {
+; CHECK-LABEL:  test_cbrtf128:
+; CHECK-AARCH64:b llvm.cbrt.f128
+; CHECK-RISCV32:call llvm.cbrt.f128@plt
+; CHECK-S390X:  brasl {{%.*}} llvm.cbrt.f128@PLT
+; CHECK-X64:jmp llvm.cbrt.f128@PLT # TAILCALL
+; CHECK-X86:calll llvm.cbrt.f128@PLT
+start:
+  %0 = tail call fp128 @llvm.cbrt.f128(fp128 %a)
+  ret fp128 %0
+}
+
+declare fp128 @llvm.cbrt.f128(fp128)
+
+
+define fp128 @test_ceilf128(fp128 %a) {
+; CHECK-LABEL:  test_ceilf128:
+; CHECK-AARCH64:b ceill
+; CHECK-RISCV32:call ceill@plt
+; CHECK-S390X:  brasl {{%.*}} ceill@PLT
+; CHECK-X64:jmp ceill@PLT
+; CHECK-X86:calll ceill
+start:
+  %0 = tail call fp128 @llvm.ceil.f128(fp128 %a)
+  ret fp128 %0
+}
+
+declare fp128 @llvm.ceil.f128(fp128)
+
+
+define fp128 @test_copysignf128(fp128 %a, fp128 %b) {
+; No math library call here, so make sure the assembly does the correct thing.
+; This test is autogenerated
+; CHECK-LABEL:test_copysignf128:
+; CHECK-AARCH64-LABEL: test_copysignf128:
+; CHECK-AARCH64:   // %bb.0: // %start
+; CHECK-AARCH64-NEXT:stp q0, q1, [sp, #-32]!
+; CHECK-AARCH64-NEXT:.cfi_def_cfa_offset 32
+; CHECK-AARCH64-NEXT:ldrb w8, [sp, #15]
+; CHECK-AARCH64-NEXT:ldrb w9, [sp, #31]
+; CHECK-AARCH64-NEXT:bfxil w9, w8, #0, #7
+; CHECK-AARCH64-NEXT:strb w9, [sp, #15]
+; CHECK-AARCH64-NEXT:ldr q0, [sp], #32
+; CHECK-AARCH64-NEXT:ret
+;
+; CHECK-RISCV32-LABEL: test_copysignf128:
+; CHECK-RISCV32:   # %bb.0: # %start
+; CHECK-RISCV32-NEXT:lw a3, 0(a1)
+; CHECK-RISCV32-NEXT:lw a4, 4(a1)
+; CHECK-RISCV32-NEXT:lw a2, 12(a2)
+; CHECK-RISCV32-NEXT:lw a5, 12(a1)
+; CHECK-RISCV32-NEXT:lw a1, 8(a1)
+; CHECK-RISCV32-NEXT:lui a6, 524288
+; CHECK-RISCV32-NEXT:and a2, a2, a6
+; CHECK-RISCV32-NEXT:slli a5, a5, 1
+; CHECK-RISCV32-NEXT:srli a5, a5, 1
+; CHECK-RISCV32-NEXT:or a2, a5, a2
+; CHECK-RISCV32-NEXT:sw a1, 8(a0)
+; CHECK-RISCV32-NEXT:sw a4, 4(a0)
+; CHECK-RISCV32-NEXT:sw a3, 0(a0)
+; CHECK-RISCV32-NEXT:sw a2, 12(a0)
+; CHECK-RISCV32-NEXT:ret
+;
+; CHECK-S390X-LABEL: test_copysignf128:
+; CHECK-S390X:   # %bb.0: # %start
+; CHECK-S390X-NEXT:ld %f0, 0(%r3)
+; CHECK-S390X-NEXT:ld %f2, 8(%r3)
+; CHECK-S390X-NEXT:ld %f1, 0(%r4)
+; CHECK-S390X-NEXT:ld %f3, 8(%r4)
+; CHECK-S390X-NEXT:cpsdr %f0, %f1, %f0
+; CHECK-S390X-NEXT:std %f0, 0(%r2)
+; CHECK-S390X-NEXT:std %f2, 8(%r2)
+; CHECK-S390X-NEXT:br %r14
+;
+; CHECK-X86-LABEL: test_copysignf128:
+; CHECK-X86:   # %bb.0: # %start
+; CHECK-X86-NEXT:pushl %ebx
+; CHECK-X86-NEXT:.cfi_def_cfa_offset 8
+; CHECK-X86-NEXT:pushl %edi
+; CHECK-X86-NEXT:.cfi_def_cfa_offset 12
+; CHECK-X86-NEXT:pushl %esi
+; CHECK-X86-NEXT:.cfi_def_cfa_offset 16
+; CHECK-X86-NEXT:.cfi_offset %esi, -16
+; CHECK-X86-NEXT:.cfi_offset %edi, -12
+; CHECK-X86-NEXT:.cfi_offset %ebx, -8
+; CHECK-X86-NEXT:movl {{[0-9]+}}(%esp), %eax
+; CHECK-X86-NEXT:movl {{[0-9]+}}(%esp), %ecx
+; CHECK-X86-N

[clang] [llvm] [WIP] Correct lowering of `fp128` intrinsics (PR #76558)

2023-12-29 Thread Trevor Gross via cfe-commits

https://github.com/tgross35 updated 
https://github.com/llvm/llvm-project/pull/76558

>From 590f4920ceb1a80d711d39624b0249cd9ff774d2 Mon Sep 17 00:00:00 2001
From: Trevor Gross 
Date: Fri, 11 Aug 2023 22:16:01 -0400
Subject: [PATCH 1/4] [IR] Add an xpassing test for `f128` intrinsic lowering

`f128` intrinsic functions lower to incorrect libc calls. Add a test
showing current behavior.
---
 .../CodeGen/Generic/f128-math-lowering.ll | 610 ++
 1 file changed, 610 insertions(+)
 create mode 100644 llvm/test/CodeGen/Generic/f128-math-lowering.ll

diff --git a/llvm/test/CodeGen/Generic/f128-math-lowering.ll 
b/llvm/test/CodeGen/Generic/f128-math-lowering.ll
new file mode 100644
index 00..8a70786d97fe67
--- /dev/null
+++ b/llvm/test/CodeGen/Generic/f128-math-lowering.ll
@@ -0,0 +1,610 @@
+
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 
UTC_ARGS: --version 2
+;
+; RUN: llc < %s -mtriple=aarch64-unknown-unknown -verify-machineinstrs | 
FileCheck %s --check-prefix=CHECK-AARCH64
+; RUN: llc < %s -mtriple=riscv32-unknown-unknown -verify-machineinstrs | 
FileCheck %s --check-prefix=CHECK-RISCV32
+; RUN: llc < %s -mtriple=s390x-unknown-unknown -verify-machineinstrs | 
FileCheck %s --check-prefix=CHECK-S390X
+; RUN: llc < %s -mtriple=i686-unknown-unknown   -verify-machineinstrs | 
FileCheck %s --check-prefix=CHECK-X86
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -verify-machineinstrs | 
FileCheck %s --check-prefix=CHECK-X64
+;
+; Verify that fp128 intrinsics only lower to `long double` calls on platforms
+; where `f128` and `long double` have the same layout.
+;
+; We test on x86 and x64 which have 80-bit ld, as well as aarch64 (ld == f128),
+; riscv32 (ld == f64), and s380x (ld == f128 with different alignment from
+; x64/aarch64 f128).
+;
+; FIXME: these emit calls to long double functions but should emit f128 calls
+
+define fp128 @test_cbrtf128(fp128 %a) {
+; CHECK-LABEL:  test_cbrtf128:
+; CHECK-AARCH64:b llvm.cbrt.f128
+; CHECK-RISCV32:call llvm.cbrt.f128@plt
+; CHECK-S390X:  brasl {{%.*}} llvm.cbrt.f128@PLT
+; CHECK-X64:jmp llvm.cbrt.f128@PLT # TAILCALL
+; CHECK-X86:calll llvm.cbrt.f128@PLT
+start:
+  %0 = tail call fp128 @llvm.cbrt.f128(fp128 %a)
+  ret fp128 %0
+}
+
+declare fp128 @llvm.cbrt.f128(fp128)
+
+
+define fp128 @test_ceilf128(fp128 %a) {
+; CHECK-LABEL:  test_ceilf128:
+; CHECK-AARCH64:b ceill
+; CHECK-RISCV32:call ceill@plt
+; CHECK-S390X:  brasl {{%.*}} ceill@PLT
+; CHECK-X64:jmp ceill@PLT
+; CHECK-X86:calll ceill
+start:
+  %0 = tail call fp128 @llvm.ceil.f128(fp128 %a)
+  ret fp128 %0
+}
+
+declare fp128 @llvm.ceil.f128(fp128)
+
+
+define fp128 @test_copysignf128(fp128 %a, fp128 %b) {
+; No math library call here, so make sure the assembly does the correct thing.
+; This test is autogenerated
+; CHECK-LABEL:test_copysignf128:
+; CHECK-AARCH64-LABEL: test_copysignf128:
+; CHECK-AARCH64:   // %bb.0: // %start
+; CHECK-AARCH64-NEXT:stp q0, q1, [sp, #-32]!
+; CHECK-AARCH64-NEXT:.cfi_def_cfa_offset 32
+; CHECK-AARCH64-NEXT:ldrb w8, [sp, #15]
+; CHECK-AARCH64-NEXT:ldrb w9, [sp, #31]
+; CHECK-AARCH64-NEXT:bfxil w9, w8, #0, #7
+; CHECK-AARCH64-NEXT:strb w9, [sp, #15]
+; CHECK-AARCH64-NEXT:ldr q0, [sp], #32
+; CHECK-AARCH64-NEXT:ret
+;
+; CHECK-RISCV32-LABEL: test_copysignf128:
+; CHECK-RISCV32:   # %bb.0: # %start
+; CHECK-RISCV32-NEXT:lw a3, 0(a1)
+; CHECK-RISCV32-NEXT:lw a4, 4(a1)
+; CHECK-RISCV32-NEXT:lw a2, 12(a2)
+; CHECK-RISCV32-NEXT:lw a5, 12(a1)
+; CHECK-RISCV32-NEXT:lw a1, 8(a1)
+; CHECK-RISCV32-NEXT:lui a6, 524288
+; CHECK-RISCV32-NEXT:and a2, a2, a6
+; CHECK-RISCV32-NEXT:slli a5, a5, 1
+; CHECK-RISCV32-NEXT:srli a5, a5, 1
+; CHECK-RISCV32-NEXT:or a2, a5, a2
+; CHECK-RISCV32-NEXT:sw a1, 8(a0)
+; CHECK-RISCV32-NEXT:sw a4, 4(a0)
+; CHECK-RISCV32-NEXT:sw a3, 0(a0)
+; CHECK-RISCV32-NEXT:sw a2, 12(a0)
+; CHECK-RISCV32-NEXT:ret
+;
+; CHECK-S390X-LABEL: test_copysignf128:
+; CHECK-S390X:   # %bb.0: # %start
+; CHECK-S390X-NEXT:ld %f0, 0(%r3)
+; CHECK-S390X-NEXT:ld %f2, 8(%r3)
+; CHECK-S390X-NEXT:ld %f1, 0(%r4)
+; CHECK-S390X-NEXT:ld %f3, 8(%r4)
+; CHECK-S390X-NEXT:cpsdr %f0, %f1, %f0
+; CHECK-S390X-NEXT:std %f0, 0(%r2)
+; CHECK-S390X-NEXT:std %f2, 8(%r2)
+; CHECK-S390X-NEXT:br %r14
+;
+; CHECK-X86-LABEL: test_copysignf128:
+; CHECK-X86:   # %bb.0: # %start
+; CHECK-X86-NEXT:pushl %ebx
+; CHECK-X86-NEXT:.cfi_def_cfa_offset 8
+; CHECK-X86-NEXT:pushl %edi
+; CHECK-X86-NEXT:.cfi_def_cfa_offset 12
+; CHECK-X86-NEXT:pushl %esi
+; CHECK-X86-NEXT:.cfi_def_cfa_offset 16
+; CHECK-X86-NEXT:.cfi_offset %esi, -16
+; CHECK-X86-NEXT:.cfi_offset %edi, -12
+; CHECK-X86-NEXT:.cfi_offset %ebx, -8
+; CHECK-X86-NEXT:movl {{[0-9]+}}(%esp), %eax
+; CHECK-X86-NEXT:movl {{[0-9]+}}(%esp), %ecx
+; CHECK-X86-N

[clang] [llvm] [WIP] Correct lowering of `fp128` intrinsics (PR #76558)

2023-12-29 Thread Trevor Gross via cfe-commits

https://github.com/tgross35 updated 
https://github.com/llvm/llvm-project/pull/76558

>From 590f4920ceb1a80d711d39624b0249cd9ff774d2 Mon Sep 17 00:00:00 2001
From: Trevor Gross 
Date: Fri, 11 Aug 2023 22:16:01 -0400
Subject: [PATCH 1/4] [IR] Add an xpassing test for `f128` intrinsic lowering

`f128` intrinsic functions lower to incorrect libc calls. Add a test
showing current behavior.
---
 .../CodeGen/Generic/f128-math-lowering.ll | 610 ++
 1 file changed, 610 insertions(+)
 create mode 100644 llvm/test/CodeGen/Generic/f128-math-lowering.ll

diff --git a/llvm/test/CodeGen/Generic/f128-math-lowering.ll 
b/llvm/test/CodeGen/Generic/f128-math-lowering.ll
new file mode 100644
index 00..8a70786d97fe67
--- /dev/null
+++ b/llvm/test/CodeGen/Generic/f128-math-lowering.ll
@@ -0,0 +1,610 @@
+
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 
UTC_ARGS: --version 2
+;
+; RUN: llc < %s -mtriple=aarch64-unknown-unknown -verify-machineinstrs | 
FileCheck %s --check-prefix=CHECK-AARCH64
+; RUN: llc < %s -mtriple=riscv32-unknown-unknown -verify-machineinstrs | 
FileCheck %s --check-prefix=CHECK-RISCV32
+; RUN: llc < %s -mtriple=s390x-unknown-unknown -verify-machineinstrs | 
FileCheck %s --check-prefix=CHECK-S390X
+; RUN: llc < %s -mtriple=i686-unknown-unknown   -verify-machineinstrs | 
FileCheck %s --check-prefix=CHECK-X86
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -verify-machineinstrs | 
FileCheck %s --check-prefix=CHECK-X64
+;
+; Verify that fp128 intrinsics only lower to `long double` calls on platforms
+; where `f128` and `long double` have the same layout.
+;
+; We test on x86 and x64 which have 80-bit ld, as well as aarch64 (ld == f128),
+; riscv32 (ld == f64), and s380x (ld == f128 with different alignment from
+; x64/aarch64 f128).
+;
+; FIXME: these emit calls to long double functions but should emit f128 calls
+
+define fp128 @test_cbrtf128(fp128 %a) {
+; CHECK-LABEL:  test_cbrtf128:
+; CHECK-AARCH64:b llvm.cbrt.f128
+; CHECK-RISCV32:call llvm.cbrt.f128@plt
+; CHECK-S390X:  brasl {{%.*}} llvm.cbrt.f128@PLT
+; CHECK-X64:jmp llvm.cbrt.f128@PLT # TAILCALL
+; CHECK-X86:calll llvm.cbrt.f128@PLT
+start:
+  %0 = tail call fp128 @llvm.cbrt.f128(fp128 %a)
+  ret fp128 %0
+}
+
+declare fp128 @llvm.cbrt.f128(fp128)
+
+
+define fp128 @test_ceilf128(fp128 %a) {
+; CHECK-LABEL:  test_ceilf128:
+; CHECK-AARCH64:b ceill
+; CHECK-RISCV32:call ceill@plt
+; CHECK-S390X:  brasl {{%.*}} ceill@PLT
+; CHECK-X64:jmp ceill@PLT
+; CHECK-X86:calll ceill
+start:
+  %0 = tail call fp128 @llvm.ceil.f128(fp128 %a)
+  ret fp128 %0
+}
+
+declare fp128 @llvm.ceil.f128(fp128)
+
+
+define fp128 @test_copysignf128(fp128 %a, fp128 %b) {
+; No math library call here, so make sure the assembly does the correct thing.
+; This test is autogenerated
+; CHECK-LABEL:test_copysignf128:
+; CHECK-AARCH64-LABEL: test_copysignf128:
+; CHECK-AARCH64:   // %bb.0: // %start
+; CHECK-AARCH64-NEXT:stp q0, q1, [sp, #-32]!
+; CHECK-AARCH64-NEXT:.cfi_def_cfa_offset 32
+; CHECK-AARCH64-NEXT:ldrb w8, [sp, #15]
+; CHECK-AARCH64-NEXT:ldrb w9, [sp, #31]
+; CHECK-AARCH64-NEXT:bfxil w9, w8, #0, #7
+; CHECK-AARCH64-NEXT:strb w9, [sp, #15]
+; CHECK-AARCH64-NEXT:ldr q0, [sp], #32
+; CHECK-AARCH64-NEXT:ret
+;
+; CHECK-RISCV32-LABEL: test_copysignf128:
+; CHECK-RISCV32:   # %bb.0: # %start
+; CHECK-RISCV32-NEXT:lw a3, 0(a1)
+; CHECK-RISCV32-NEXT:lw a4, 4(a1)
+; CHECK-RISCV32-NEXT:lw a2, 12(a2)
+; CHECK-RISCV32-NEXT:lw a5, 12(a1)
+; CHECK-RISCV32-NEXT:lw a1, 8(a1)
+; CHECK-RISCV32-NEXT:lui a6, 524288
+; CHECK-RISCV32-NEXT:and a2, a2, a6
+; CHECK-RISCV32-NEXT:slli a5, a5, 1
+; CHECK-RISCV32-NEXT:srli a5, a5, 1
+; CHECK-RISCV32-NEXT:or a2, a5, a2
+; CHECK-RISCV32-NEXT:sw a1, 8(a0)
+; CHECK-RISCV32-NEXT:sw a4, 4(a0)
+; CHECK-RISCV32-NEXT:sw a3, 0(a0)
+; CHECK-RISCV32-NEXT:sw a2, 12(a0)
+; CHECK-RISCV32-NEXT:ret
+;
+; CHECK-S390X-LABEL: test_copysignf128:
+; CHECK-S390X:   # %bb.0: # %start
+; CHECK-S390X-NEXT:ld %f0, 0(%r3)
+; CHECK-S390X-NEXT:ld %f2, 8(%r3)
+; CHECK-S390X-NEXT:ld %f1, 0(%r4)
+; CHECK-S390X-NEXT:ld %f3, 8(%r4)
+; CHECK-S390X-NEXT:cpsdr %f0, %f1, %f0
+; CHECK-S390X-NEXT:std %f0, 0(%r2)
+; CHECK-S390X-NEXT:std %f2, 8(%r2)
+; CHECK-S390X-NEXT:br %r14
+;
+; CHECK-X86-LABEL: test_copysignf128:
+; CHECK-X86:   # %bb.0: # %start
+; CHECK-X86-NEXT:pushl %ebx
+; CHECK-X86-NEXT:.cfi_def_cfa_offset 8
+; CHECK-X86-NEXT:pushl %edi
+; CHECK-X86-NEXT:.cfi_def_cfa_offset 12
+; CHECK-X86-NEXT:pushl %esi
+; CHECK-X86-NEXT:.cfi_def_cfa_offset 16
+; CHECK-X86-NEXT:.cfi_offset %esi, -16
+; CHECK-X86-NEXT:.cfi_offset %edi, -12
+; CHECK-X86-NEXT:.cfi_offset %ebx, -8
+; CHECK-X86-NEXT:movl {{[0-9]+}}(%esp), %eax
+; CHECK-X86-NEXT:movl {{[0-9]+}}(%esp), %ecx
+; CHECK-X86-N

[clang] [llvm] [WIP] Correct lowering of `fp128` intrinsics (PR #76558)

2023-12-29 Thread Trevor Gross via cfe-commits

https://github.com/tgross35 updated 
https://github.com/llvm/llvm-project/pull/76558

>From 7df4ef93989b1913d9200fbc29d6d04f9e59d51a Mon Sep 17 00:00:00 2001
From: Trevor Gross 
Date: Fri, 11 Aug 2023 22:16:01 -0400
Subject: [PATCH 1/4] [IR] Add an xpassing test for `f128` intrinsic lowering

`f128` intrinsic functions lower to incorrect libc calls. Add a test
showing current behavior.
---
 .../CodeGen/Generic/f128-math-lowering.ll | 610 ++
 1 file changed, 610 insertions(+)
 create mode 100644 llvm/test/CodeGen/Generic/f128-math-lowering.ll

diff --git a/llvm/test/CodeGen/Generic/f128-math-lowering.ll 
b/llvm/test/CodeGen/Generic/f128-math-lowering.ll
new file mode 100644
index 00..30efb8ef34918e
--- /dev/null
+++ b/llvm/test/CodeGen/Generic/f128-math-lowering.ll
@@ -0,0 +1,610 @@
+
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 
UTC_ARGS: --version 2
+;;
+; RUN: llc < %s -mtriple=aarch64-unknown-unknown -verify-machineinstrs | 
FileCheck %s --check-prefix=CHECK-AARCH64
+; RUN: llc < %s -mtriple=riscv32-unknown-unknown -verify-machineinstrs | 
FileCheck %s --check-prefix=CHECK-RISCV32
+; RUN: llc < %s -mtriple=s390x-unknown-unknown -verify-machineinstrs | 
FileCheck %s --check-prefix=CHECK-S390X
+; RUN: llc < %s -mtriple=i686-unknown-unknown   -verify-machineinstrs | 
FileCheck %s --check-prefix=CHECK-X86
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -verify-machineinstrs | 
FileCheck %s --check-prefix=CHECK-X64
+;
+; Verify that fp128 intrinsics only lower to `long double` calls on platforms
+; where `f128` and `long double` have the same layout.
+;
+; We test on x86 and x64 which have 80-bit ld, as well as aarch64 (ld == f128),
+; riscv32 (ld == f64), and s380x (ld == f128 with different alignment from
+; x64/aarch64 f128).
+;
+; FIXME: these emit calls to long double functions but should emit f128 calls
+
+define fp128 @test_cbrtf128(fp128 %a) {
+; CHECK-LABEL:  test_cbrtf128:
+; CHECK-AARCH64:b llvm.cbrt.f128
+; CHECK-RISCV32:call llvm.cbrt.f128@plt
+; CHECK-S390X:  brasl {{%.*}} llvm.cbrt.f128@PLT
+; CHECK-X64:jmp llvm.cbrt.f128@PLT # TAILCALL
+; CHECK-X86:calll llvm.cbrt.f128@PLT
+start:
+  %0 = tail call fp128 @llvm.cbrt.f128(fp128 %a)
+  ret fp128 %0
+}
+
+declare fp128 @llvm.cbrt.f128(fp128)
+
+
+define fp128 @test_ceilf128(fp128 %a) {
+; CHECK-LABEL:  test_ceilf128:
+; CHECK-AARCH64:b ceill
+; CHECK-RISCV32:call ceill@plt
+; CHECK-S390X:  brasl {{%.*}} ceill@PLT
+; CHECK-X64:jmp ceill@PLT
+; CHECK-X86:calll ceill
+start:
+  %0 = tail call fp128 @llvm.ceil.f128(fp128 %a)
+  ret fp128 %0
+}
+
+declare fp128 @llvm.ceil.f128(fp128)
+
+
+define fp128 @test_copysignf128(fp128 %a, fp128 %b) {
+; No math library call here, so make sure the assembly does the correct thing.
+; This test is autogenerated
+; CHECK-LABEL:test_copysignf128:
+; CHECK-AARCH64-LABEL: test_copysignf128:
+; CHECK-AARCH64:   // %bb.0: // %start
+; CHECK-AARCH64-NEXT:stp q0, q1, [sp, #-32]!
+; CHECK-AARCH64-NEXT:.cfi_def_cfa_offset 32
+; CHECK-AARCH64-NEXT:ldrb w8, [sp, #15]
+; CHECK-AARCH64-NEXT:ldrb w9, [sp, #31]
+; CHECK-AARCH64-NEXT:bfxil w9, w8, #0, #7
+; CHECK-AARCH64-NEXT:strb w9, [sp, #15]
+; CHECK-AARCH64-NEXT:ldr q0, [sp], #32
+; CHECK-AARCH64-NEXT:ret
+;
+; CHECK-RISCV32-LABEL: test_copysignf128:
+; CHECK-RISCV32:   # %bb.0: # %start
+; CHECK-RISCV32-NEXT:lw a3, 0(a1)
+; CHECK-RISCV32-NEXT:lw a4, 4(a1)
+; CHECK-RISCV32-NEXT:lw a2, 12(a2)
+; CHECK-RISCV32-NEXT:lw a5, 12(a1)
+; CHECK-RISCV32-NEXT:lw a1, 8(a1)
+; CHECK-RISCV32-NEXT:lui a6, 524288
+; CHECK-RISCV32-NEXT:and a2, a2, a6
+; CHECK-RISCV32-NEXT:slli a5, a5, 1
+; CHECK-RISCV32-NEXT:srli a5, a5, 1
+; CHECK-RISCV32-NEXT:or a2, a5, a2
+; CHECK-RISCV32-NEXT:sw a1, 8(a0)
+; CHECK-RISCV32-NEXT:sw a4, 4(a0)
+; CHECK-RISCV32-NEXT:sw a3, 0(a0)
+; CHECK-RISCV32-NEXT:sw a2, 12(a0)
+; CHECK-RISCV32-NEXT:ret
+;
+; CHECK-S390X-LABEL: test_copysignf128:
+; CHECK-S390X:   # %bb.0: # %start
+; CHECK-S390X-NEXT:ld %f0, 0(%r3)
+; CHECK-S390X-NEXT:ld %f2, 8(%r3)
+; CHECK-S390X-NEXT:ld %f1, 0(%r4)
+; CHECK-S390X-NEXT:ld %f3, 8(%r4)
+; CHECK-S390X-NEXT:cpsdr %f0, %f1, %f0
+; CHECK-S390X-NEXT:std %f0, 0(%r2)
+; CHECK-S390X-NEXT:std %f2, 8(%r2)
+; CHECK-S390X-NEXT:br %r14
+;
+; CHECK-X86-LABEL: test_copysignf128:
+; CHECK-X86:   # %bb.0: # %start
+; CHECK-X86-NEXT:pushl %ebx
+; CHECK-X86-NEXT:.cfi_def_cfa_offset 8
+; CHECK-X86-NEXT:pushl %edi
+; CHECK-X86-NEXT:.cfi_def_cfa_offset 12
+; CHECK-X86-NEXT:pushl %esi
+; CHECK-X86-NEXT:.cfi_def_cfa_offset 16
+; CHECK-X86-NEXT:.cfi_offset %esi, -16
+; CHECK-X86-NEXT:.cfi_offset %edi, -12
+; CHECK-X86-NEXT:.cfi_offset %ebx, -8
+; CHECK-X86-NEXT:movl {{[0-9]+}}(%esp), %eax
+; CHECK-X86-NEXT:movl {{[0-9]+}}(%esp), %ecx
+; CHECK-X86-

[clang] [llvm] [WIP] Correct lowering of `fp128` intrinsics (PR #76558)

2023-12-29 Thread Trevor Gross via cfe-commits

tgross35 wrote:

@efriedma-quic was looking at this on phabricator

https://github.com/llvm/llvm-project/pull/76558
___
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits