[clang] [clang][CodeGen] Propagate pragma set fast-math flags to floating point builtins (PR #90377)
karka228 wrote: > LGTM. Thanks. I will close the issue #87758. Thanks @andykaylor and @zahiraam for reviewing. https://github.com/llvm/llvm-project/pull/90377 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [clang][CodeGen] Propagate pragma set fast-math flags to floating point builtins (PR #90377)
https://github.com/karka228 closed https://github.com/llvm/llvm-project/pull/90377 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [clang][CodeGen] Propagate pragma set fast-math flags to floating point builtins (PR #90377)
https://github.com/zahiraam approved this pull request. LGTM. Thanks. https://github.com/llvm/llvm-project/pull/90377 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [clang][CodeGen] Propagate pragma set fast-math flags to floating point builtins (PR #90377)
https://github.com/karka228 updated https://github.com/llvm/llvm-project/pull/90377 >From 80836f51573c8ba474a0b5d5890a489d5f0c7034 Mon Sep 17 00:00:00 2001 From: Karl-Johan Karlsson Date: Sat, 27 Apr 2024 22:04:11 +0200 Subject: [PATCH 1/6] Pre-commit reproducer for fast-math flags on calls Adding test case related to issue #87758 The testcase show the faulty behavior where the calls to llvm.pow.f32 and llvm.fma.f32 are not attributed with the "fast" flag. --- clang/test/CodeGen/pr87758.c | 29 + 1 file changed, 29 insertions(+) create mode 100644 clang/test/CodeGen/pr87758.c diff --git a/clang/test/CodeGen/pr87758.c b/clang/test/CodeGen/pr87758.c new file mode 100644 index 00..5c5a6de442c2ac --- /dev/null +++ b/clang/test/CodeGen/pr87758.c @@ -0,0 +1,29 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// RUN: %clang_cc1 -triple x86_64-linux-gnu -O3 -emit-llvm -o - %s \ +// RUN: | FileCheck -check-prefix=CHECK %s + +// FIXME: Reproducer for issue #87758 +// The testcase below show the faulty behavior where the calls to +// llvm.pow.f32 and llvm.fma.f32 are not attributed with the "fast" flag. + +float sqrtf(float x); +float powf(float x, float y); +float fmaf(float x, float y, float z); + +#pragma float_control(push) +#pragma float_control(precise, off) +// CHECK-LABEL: define dso_local float @fp_precise_libm_calls( +// CHECK-SAME: float noundef [[A:%.*]], float noundef [[B:%.*]], float noundef [[C:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = tail call fast float @llvm.sqrt.f32(float [[A]]) +// CHECK-NEXT:[[TMP1:%.*]] = tail call float @llvm.pow.f32(float [[TMP0]], float [[B]]) +// CHECK-NEXT:[[TMP2:%.*]] = tail call float @llvm.fma.f32(float [[TMP1]], float [[B]], float [[C]]) +// CHECK-NEXT:ret float [[TMP2]] +// +float fp_precise_libm_calls(float a, float b, float c) { + a = sqrtf(a); + a = powf(a,b); + a = fmaf(a,b,c); + return a; +} +#pragma float_control(pop) >From 4af840da9ec863f6372083f5e96998fb07db6b9c Mon Sep 17 00:00:00 2001 From: Karl-Johan Karlsson Date: Sat, 27 Apr 2024 22:37:34 +0200 Subject: [PATCH 2/6] [clang][CodeGen] Propagate pragma set fast-math flags to floating point builtins This is a fix for the issue #87758 where fast-math flags are not propagated all builtins. It seems like pragmas with fast math flags was only propagated to calls of unary floating point builtins. This patch propagate them also for binary and ternary floating point builtins. --- clang/lib/CodeGen/CGBuiltin.cpp | 6 +++--- clang/test/CodeGen/pr87758.c| 9 - 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index d08ab539148914..d61a63ac61572d 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -513,8 +513,8 @@ static Value *emitBinaryMaybeConstrainedFPBuiltin(CodeGenFunction , llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0)); llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1)); + CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E); if (CGF.Builder.getIsFPConstrained()) { -CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E); Function *F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID, Src0->getType()); return CGF.Builder.CreateConstrainedFPCall(F, { Src0, Src1 }); } else { @@ -530,8 +530,8 @@ static Value *emitBinaryExpMaybeConstrainedFPBuiltin( llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0)); llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1)); + CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E); if (CGF.Builder.getIsFPConstrained()) { -CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E); Function *F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID, {Src0->getType(), Src1->getType()}); return CGF.Builder.CreateConstrainedFPCall(F, {Src0, Src1}); @@ -551,8 +551,8 @@ static Value *emitTernaryMaybeConstrainedFPBuiltin(CodeGenFunction , llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1)); llvm::Value *Src2 = CGF.EmitScalarExpr(E->getArg(2)); + CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E); if (CGF.Builder.getIsFPConstrained()) { -CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E); Function *F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID, Src0->getType()); return CGF.Builder.CreateConstrainedFPCall(F, { Src0, Src1, Src2 }); } else { diff --git a/clang/test/CodeGen/pr87758.c b/clang/test/CodeGen/pr87758.c index 5c5a6de442c2ac..05b3232986e0a6 100644 --- a/clang/test/CodeGen/pr87758.c +++ b/clang/test/CodeGen/pr87758.c @@ -2,9 +2,8 @@ // RUN: %clang_cc1 -triple x86_64-linux-gnu -O3 -emit-llvm -o - %s \ // RUN: | FileCheck -check-prefix=CHECK %s -// FIXME: Reproducer for issue #87758 -// The testcase below show the faulty behavior where the calls to -// llvm.pow.f32
[clang] [clang][CodeGen] Propagate pragma set fast-math flags to floating point builtins (PR #90377)
@@ -0,0 +1,76 @@ +// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm -o - %s | FileCheck %s + +// precise mode +// RUN: %clang_cc1 -triple x86_64-linux-gnu -fmath-errno -ffp-contract=on \ +// RUN: -fno-rounding-math -emit-llvm -o - %s | FileCheck \ +// RUN: --check-prefix=CHECK-PRECISE %s + +// fast mode +// RUN: %clang_cc1 -triple x86_64-linux-gnu -ffast-math -ffp-contract=fast \ +// RUN: -emit-llvm -o - %s | FileCheck --check-prefix=CHECK-FAST %s + +// Reproducer for issue #87758 +// The testcase below verifies that the "fast" flag are set on the calls. + +float sqrtf(float x); // unary fp builtin +float powf(float x, float y); // binary fp builtin +float fmaf(float x, float y, float z); // ternary fp builtin +char *rindex(const char *s, int c); // not a fp builtin + +#pragma float_control(push) +#pragma float_control(precise, off) +// CHECK: define dso_local float @fp_precise_off_libm_calls( +// CHECK: %{{.*}} = call fast float @llvm.sqrt.f32( karka228 wrote: I guess not any longer. I'll change the testcase. https://github.com/llvm/llvm-project/pull/90377 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [clang][CodeGen] Propagate pragma set fast-math flags to floating point builtins (PR #90377)
@@ -0,0 +1,76 @@ +// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm -o - %s | FileCheck %s + +// precise mode +// RUN: %clang_cc1 -triple x86_64-linux-gnu -fmath-errno -ffp-contract=on \ +// RUN: -fno-rounding-math -emit-llvm -o - %s | FileCheck \ +// RUN: --check-prefix=CHECK-PRECISE %s + +// fast mode +// RUN: %clang_cc1 -triple x86_64-linux-gnu -ffast-math -ffp-contract=fast \ +// RUN: -emit-llvm -o - %s | FileCheck --check-prefix=CHECK-FAST %s + +// Reproducer for issue #87758 +// The testcase below verifies that the "fast" flag are set on the calls. + +float sqrtf(float x); // unary fp builtin +float powf(float x, float y); // binary fp builtin +float fmaf(float x, float y, float z); // ternary fp builtin +char *rindex(const char *s, int c); // not a fp builtin + +#pragma float_control(push) +#pragma float_control(precise, off) +// CHECK: define dso_local float @fp_precise_off_libm_calls( +// CHECK: %{{.*}} = call fast float @llvm.sqrt.f32( zahiraam wrote: Do we need the %{{.*}} = before these calls? https://github.com/llvm/llvm-project/pull/90377 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [clang][CodeGen] Propagate pragma set fast-math flags to floating point builtins (PR #90377)
@@ -0,0 +1,55 @@ +// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -triple x86_64-linux-gnu -O3 -emit-llvm -disable-llvm-passes -o - %s | FileCheck %s +// RUN: %clang_cc1 -triple x86_64-linux-gnu -O3 -fmath-errno -ffp-contract=on -fno-rounding-math -emit-llvm -disable-llvm-passes -o - %s | FileCheck %s +// RUN: %clang_cc1 -triple x86_64-linux-gnu -ffast-math -ffp-contract=fast -emit-llvm -o - %s | FileCheck %s +// RUN: %clang -O3 -S -emit-llvm -Xclang -disable-llvm-passes %s -o - | FileCheck %s +// RUN: %clang -O3 -ffp-model=fast -S -emit-llvm -Xclang -disable-llvm-passes %s -o - | FileCheck %s +// RUN: %clang -O3 -ffp-model=precise -S -emit-llvm -Xclang -disable-llvm-passes %s -o - | FileCheck %s + +// Reproducer for issue #87758 +// The testcase below verifies that the "fast" flag are set on the calls. + karka228 wrote: Sure, I update the testcase with only those run lines. https://github.com/llvm/llvm-project/pull/90377 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [clang][CodeGen] Propagate pragma set fast-math flags to floating point builtins (PR #90377)
https://github.com/karka228 updated https://github.com/llvm/llvm-project/pull/90377 >From 80836f51573c8ba474a0b5d5890a489d5f0c7034 Mon Sep 17 00:00:00 2001 From: Karl-Johan Karlsson Date: Sat, 27 Apr 2024 22:04:11 +0200 Subject: [PATCH 1/5] Pre-commit reproducer for fast-math flags on calls Adding test case related to issue #87758 The testcase show the faulty behavior where the calls to llvm.pow.f32 and llvm.fma.f32 are not attributed with the "fast" flag. --- clang/test/CodeGen/pr87758.c | 29 + 1 file changed, 29 insertions(+) create mode 100644 clang/test/CodeGen/pr87758.c diff --git a/clang/test/CodeGen/pr87758.c b/clang/test/CodeGen/pr87758.c new file mode 100644 index 00..5c5a6de442c2ac --- /dev/null +++ b/clang/test/CodeGen/pr87758.c @@ -0,0 +1,29 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// RUN: %clang_cc1 -triple x86_64-linux-gnu -O3 -emit-llvm -o - %s \ +// RUN: | FileCheck -check-prefix=CHECK %s + +// FIXME: Reproducer for issue #87758 +// The testcase below show the faulty behavior where the calls to +// llvm.pow.f32 and llvm.fma.f32 are not attributed with the "fast" flag. + +float sqrtf(float x); +float powf(float x, float y); +float fmaf(float x, float y, float z); + +#pragma float_control(push) +#pragma float_control(precise, off) +// CHECK-LABEL: define dso_local float @fp_precise_libm_calls( +// CHECK-SAME: float noundef [[A:%.*]], float noundef [[B:%.*]], float noundef [[C:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = tail call fast float @llvm.sqrt.f32(float [[A]]) +// CHECK-NEXT:[[TMP1:%.*]] = tail call float @llvm.pow.f32(float [[TMP0]], float [[B]]) +// CHECK-NEXT:[[TMP2:%.*]] = tail call float @llvm.fma.f32(float [[TMP1]], float [[B]], float [[C]]) +// CHECK-NEXT:ret float [[TMP2]] +// +float fp_precise_libm_calls(float a, float b, float c) { + a = sqrtf(a); + a = powf(a,b); + a = fmaf(a,b,c); + return a; +} +#pragma float_control(pop) >From 4af840da9ec863f6372083f5e96998fb07db6b9c Mon Sep 17 00:00:00 2001 From: Karl-Johan Karlsson Date: Sat, 27 Apr 2024 22:37:34 +0200 Subject: [PATCH 2/5] [clang][CodeGen] Propagate pragma set fast-math flags to floating point builtins This is a fix for the issue #87758 where fast-math flags are not propagated all builtins. It seems like pragmas with fast math flags was only propagated to calls of unary floating point builtins. This patch propagate them also for binary and ternary floating point builtins. --- clang/lib/CodeGen/CGBuiltin.cpp | 6 +++--- clang/test/CodeGen/pr87758.c| 9 - 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index d08ab539148914..d61a63ac61572d 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -513,8 +513,8 @@ static Value *emitBinaryMaybeConstrainedFPBuiltin(CodeGenFunction , llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0)); llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1)); + CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E); if (CGF.Builder.getIsFPConstrained()) { -CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E); Function *F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID, Src0->getType()); return CGF.Builder.CreateConstrainedFPCall(F, { Src0, Src1 }); } else { @@ -530,8 +530,8 @@ static Value *emitBinaryExpMaybeConstrainedFPBuiltin( llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0)); llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1)); + CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E); if (CGF.Builder.getIsFPConstrained()) { -CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E); Function *F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID, {Src0->getType(), Src1->getType()}); return CGF.Builder.CreateConstrainedFPCall(F, {Src0, Src1}); @@ -551,8 +551,8 @@ static Value *emitTernaryMaybeConstrainedFPBuiltin(CodeGenFunction , llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1)); llvm::Value *Src2 = CGF.EmitScalarExpr(E->getArg(2)); + CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E); if (CGF.Builder.getIsFPConstrained()) { -CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E); Function *F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID, Src0->getType()); return CGF.Builder.CreateConstrainedFPCall(F, { Src0, Src1, Src2 }); } else { diff --git a/clang/test/CodeGen/pr87758.c b/clang/test/CodeGen/pr87758.c index 5c5a6de442c2ac..05b3232986e0a6 100644 --- a/clang/test/CodeGen/pr87758.c +++ b/clang/test/CodeGen/pr87758.c @@ -2,9 +2,8 @@ // RUN: %clang_cc1 -triple x86_64-linux-gnu -O3 -emit-llvm -o - %s \ // RUN: | FileCheck -check-prefix=CHECK %s -// FIXME: Reproducer for issue #87758 -// The testcase below show the faulty behavior where the calls to -// llvm.pow.f32
[clang] [clang][CodeGen] Propagate pragma set fast-math flags to floating point builtins (PR #90377)
@@ -0,0 +1,55 @@ +// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -triple x86_64-linux-gnu -O3 -emit-llvm -disable-llvm-passes -o - %s | FileCheck %s +// RUN: %clang_cc1 -triple x86_64-linux-gnu -O3 -fmath-errno -ffp-contract=on -fno-rounding-math -emit-llvm -disable-llvm-passes -o - %s | FileCheck %s +// RUN: %clang_cc1 -triple x86_64-linux-gnu -ffast-math -ffp-contract=fast -emit-llvm -o - %s | FileCheck %s zahiraam wrote: This is equivalent to line 8. https://github.com/llvm/llvm-project/pull/90377 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [clang][CodeGen] Propagate pragma set fast-math flags to floating point builtins (PR #90377)
@@ -0,0 +1,55 @@ +// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -triple x86_64-linux-gnu -O3 -emit-llvm -disable-llvm-passes -o - %s | FileCheck %s +// RUN: %clang_cc1 -triple x86_64-linux-gnu -O3 -fmath-errno -ffp-contract=on -fno-rounding-math -emit-llvm -disable-llvm-passes -o - %s | FileCheck %s zahiraam wrote: This is equivalent to line 7. https://github.com/llvm/llvm-project/pull/90377 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [clang][CodeGen] Propagate pragma set fast-math flags to floating point builtins (PR #90377)
@@ -0,0 +1,55 @@ +// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -triple x86_64-linux-gnu -O3 -emit-llvm -disable-llvm-passes -o - %s | FileCheck %s +// RUN: %clang_cc1 -triple x86_64-linux-gnu -O3 -fmath-errno -ffp-contract=on -fno-rounding-math -emit-llvm -disable-llvm-passes -o - %s | FileCheck %s +// RUN: %clang_cc1 -triple x86_64-linux-gnu -ffast-math -ffp-contract=fast -emit-llvm -o - %s | FileCheck %s +// RUN: %clang -O3 -S -emit-llvm -Xclang -disable-llvm-passes %s -o - | FileCheck %s +// RUN: %clang -O3 -ffp-model=fast -S -emit-llvm -Xclang -disable-llvm-passes %s -o - | FileCheck %s +// RUN: %clang -O3 -ffp-model=precise -S -emit-llvm -Xclang -disable-llvm-passes %s -o - | FileCheck %s + +// Reproducer for issue #87758 +// The testcase below verifies that the "fast" flag are set on the calls. + zahiraam wrote: How about using these run lines? `// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm -o - %s | FileCheck %s` `// precise mode` `// RUN: %clang_cc1 -triple x86_64-linux-gnu -fmath-errno -ffp-contract=on \` `// RUN: -fno-rounding-math -emit-llvm -o - %s | FileCheck %s` `// fast mode` `// RUN %clang_cc1 -triple x86_64-linux-gnu -ffast-math -ffp-contract=fast \` `// RUN: -emit-llvm -o - %s | FileCheck %s` You might need to use different prefixes. https://github.com/llvm/llvm-project/pull/90377 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [clang][CodeGen] Propagate pragma set fast-math flags to floating point builtins (PR #90377)
@@ -0,0 +1,28 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// RUN: %clang_cc1 -triple x86_64-linux-gnu -O3 -emit-llvm -o - %s \ +// RUN: | FileCheck -check-prefix=CHECK %s + +// Reproducer for issue #87758 +// The testcase below verifies that the "fast" flag are set on the calls. + +float sqrtf(float x); +float powf(float x, float y); +float fmaf(float x, float y, float z); + +#pragma float_control(push) +#pragma float_control(precise, off) +// CHECK-LABEL: define dso_local float @fp_precise_libm_calls( +// CHECK-SAME: float noundef [[A:%.*]], float noundef [[B:%.*]], float noundef [[C:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = tail call fast float @llvm.sqrt.f32(float [[A]]) +// CHECK-NEXT:[[TMP1:%.*]] = tail call fast float @llvm.pow.f32(float [[TMP0]], float [[B]]) +// CHECK-NEXT:[[TMP2:%.*]] = tail call fast float @llvm.fma.f32(float [[TMP1]], float [[B]], float [[C]]) +// CHECK-NEXT:ret float [[TMP2]] +// +float fp_precise_libm_calls(float a, float b, float c) { + a = sqrtf(a); + a = powf(a,b); + a = fmaf(a,b,c); + return a; +} +#pragma float_control(pop) karka228 wrote: I updated the testcase with more RUN: lines and also added a libc call to rindex that have nothing to do with floats to verify that it never get the "fast" flag. https://github.com/llvm/llvm-project/pull/90377 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [clang][CodeGen] Propagate pragma set fast-math flags to floating point builtins (PR #90377)
https://github.com/karka228 updated https://github.com/llvm/llvm-project/pull/90377 >From 80836f51573c8ba474a0b5d5890a489d5f0c7034 Mon Sep 17 00:00:00 2001 From: Karl-Johan Karlsson Date: Sat, 27 Apr 2024 22:04:11 +0200 Subject: [PATCH 1/4] Pre-commit reproducer for fast-math flags on calls Adding test case related to issue #87758 The testcase show the faulty behavior where the calls to llvm.pow.f32 and llvm.fma.f32 are not attributed with the "fast" flag. --- clang/test/CodeGen/pr87758.c | 29 + 1 file changed, 29 insertions(+) create mode 100644 clang/test/CodeGen/pr87758.c diff --git a/clang/test/CodeGen/pr87758.c b/clang/test/CodeGen/pr87758.c new file mode 100644 index 00..5c5a6de442c2ac --- /dev/null +++ b/clang/test/CodeGen/pr87758.c @@ -0,0 +1,29 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// RUN: %clang_cc1 -triple x86_64-linux-gnu -O3 -emit-llvm -o - %s \ +// RUN: | FileCheck -check-prefix=CHECK %s + +// FIXME: Reproducer for issue #87758 +// The testcase below show the faulty behavior where the calls to +// llvm.pow.f32 and llvm.fma.f32 are not attributed with the "fast" flag. + +float sqrtf(float x); +float powf(float x, float y); +float fmaf(float x, float y, float z); + +#pragma float_control(push) +#pragma float_control(precise, off) +// CHECK-LABEL: define dso_local float @fp_precise_libm_calls( +// CHECK-SAME: float noundef [[A:%.*]], float noundef [[B:%.*]], float noundef [[C:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = tail call fast float @llvm.sqrt.f32(float [[A]]) +// CHECK-NEXT:[[TMP1:%.*]] = tail call float @llvm.pow.f32(float [[TMP0]], float [[B]]) +// CHECK-NEXT:[[TMP2:%.*]] = tail call float @llvm.fma.f32(float [[TMP1]], float [[B]], float [[C]]) +// CHECK-NEXT:ret float [[TMP2]] +// +float fp_precise_libm_calls(float a, float b, float c) { + a = sqrtf(a); + a = powf(a,b); + a = fmaf(a,b,c); + return a; +} +#pragma float_control(pop) >From 4af840da9ec863f6372083f5e96998fb07db6b9c Mon Sep 17 00:00:00 2001 From: Karl-Johan Karlsson Date: Sat, 27 Apr 2024 22:37:34 +0200 Subject: [PATCH 2/4] [clang][CodeGen] Propagate pragma set fast-math flags to floating point builtins This is a fix for the issue #87758 where fast-math flags are not propagated all builtins. It seems like pragmas with fast math flags was only propagated to calls of unary floating point builtins. This patch propagate them also for binary and ternary floating point builtins. --- clang/lib/CodeGen/CGBuiltin.cpp | 6 +++--- clang/test/CodeGen/pr87758.c| 9 - 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index d08ab539148914..d61a63ac61572d 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -513,8 +513,8 @@ static Value *emitBinaryMaybeConstrainedFPBuiltin(CodeGenFunction , llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0)); llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1)); + CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E); if (CGF.Builder.getIsFPConstrained()) { -CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E); Function *F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID, Src0->getType()); return CGF.Builder.CreateConstrainedFPCall(F, { Src0, Src1 }); } else { @@ -530,8 +530,8 @@ static Value *emitBinaryExpMaybeConstrainedFPBuiltin( llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0)); llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1)); + CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E); if (CGF.Builder.getIsFPConstrained()) { -CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E); Function *F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID, {Src0->getType(), Src1->getType()}); return CGF.Builder.CreateConstrainedFPCall(F, {Src0, Src1}); @@ -551,8 +551,8 @@ static Value *emitTernaryMaybeConstrainedFPBuiltin(CodeGenFunction , llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1)); llvm::Value *Src2 = CGF.EmitScalarExpr(E->getArg(2)); + CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E); if (CGF.Builder.getIsFPConstrained()) { -CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E); Function *F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID, Src0->getType()); return CGF.Builder.CreateConstrainedFPCall(F, { Src0, Src1, Src2 }); } else { diff --git a/clang/test/CodeGen/pr87758.c b/clang/test/CodeGen/pr87758.c index 5c5a6de442c2ac..05b3232986e0a6 100644 --- a/clang/test/CodeGen/pr87758.c +++ b/clang/test/CodeGen/pr87758.c @@ -2,9 +2,8 @@ // RUN: %clang_cc1 -triple x86_64-linux-gnu -O3 -emit-llvm -o - %s \ // RUN: | FileCheck -check-prefix=CHECK %s -// FIXME: Reproducer for issue #87758 -// The testcase below show the faulty behavior where the calls to -// llvm.pow.f32
[clang] [clang][CodeGen] Propagate pragma set fast-math flags to floating point builtins (PR #90377)
@@ -0,0 +1,28 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// RUN: %clang_cc1 -triple x86_64-linux-gnu -O3 -emit-llvm -o - %s \ +// RUN: | FileCheck -check-prefix=CHECK %s + +// Reproducer for issue #87758 +// The testcase below verifies that the "fast" flag are set on the calls. + +float sqrtf(float x); +float powf(float x, float y); +float fmaf(float x, float y, float z); + +#pragma float_control(push) +#pragma float_control(precise, off) +// CHECK-LABEL: define dso_local float @fp_precise_libm_calls( +// CHECK-SAME: float noundef [[A:%.*]], float noundef [[B:%.*]], float noundef [[C:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = tail call fast float @llvm.sqrt.f32(float [[A]]) +// CHECK-NEXT:[[TMP1:%.*]] = tail call fast float @llvm.pow.f32(float [[TMP0]], float [[B]]) +// CHECK-NEXT:[[TMP2:%.*]] = tail call fast float @llvm.fma.f32(float [[TMP1]], float [[B]], float [[C]]) +// CHECK-NEXT:ret float [[TMP2]] +// +float fp_precise_libm_calls(float a, float b, float c) { + a = sqrtf(a); + a = powf(a,b); + a = fmaf(a,b,c); + return a; +} +#pragma float_control(pop) zahiraam wrote: I tried the patch with your change in `emitLibraryCall() ` and it seems to be fixing at least one known issue. For the code here (using your 3 functions from this PR): https://godbolt.org/z/fzsGce76x We can see that with `-ffp-model=fast` all function calls in `foof` function are correctly marked with the `fast` flag, but in `foop` they are incorrectly marked with the `fast` flag. When compiled with `ffp-model=precise` some function calls are missing the `fast` flag in `foof`. With your patch all these issues are corrected. Please confirm that. And if it's the case, then I think you can keep your last change and add more testing. https://github.com/llvm/llvm-project/pull/90377 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [clang][CodeGen] Propagate pragma set fast-math flags to floating point builtins (PR #90377)
https://github.com/zahiraam deleted https://github.com/llvm/llvm-project/pull/90377 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [clang][CodeGen] Propagate pragma set fast-math flags to floating point builtins (PR #90377)
@@ -0,0 +1,28 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// RUN: %clang_cc1 -triple x86_64-linux-gnu -O3 -emit-llvm -o - %s \ zahiraam wrote: You don't need to put every // CHECK line. Since we are interested only in the flag associated with the calls to the builtins you can just put those line. https://github.com/llvm/llvm-project/pull/90377 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [clang][CodeGen] Propagate pragma set fast-math flags to floating point builtins (PR #90377)
@@ -0,0 +1,28 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// RUN: %clang_cc1 -triple x86_64-linux-gnu -O3 -emit-llvm -o - %s \ +// RUN: | FileCheck -check-prefix=CHECK %s + +// Reproducer for issue #87758 +// The testcase below verifies that the "fast" flag are set on the calls. + +float sqrtf(float x); +float powf(float x, float y); +float fmaf(float x, float y, float z); + +#pragma float_control(push) +#pragma float_control(precise, off) +// CHECK-LABEL: define dso_local float @fp_precise_libm_calls( +// CHECK-SAME: float noundef [[A:%.*]], float noundef [[B:%.*]], float noundef [[C:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = tail call fast float @llvm.sqrt.f32(float [[A]]) +// CHECK-NEXT:[[TMP1:%.*]] = tail call fast float @llvm.pow.f32(float [[TMP0]], float [[B]]) +// CHECK-NEXT:[[TMP2:%.*]] = tail call fast float @llvm.fma.f32(float [[TMP1]], float [[B]], float [[C]]) +// CHECK-NEXT:ret float [[TMP2]] +// +float fp_precise_libm_calls(float a, float b, float c) { + a = sqrtf(a); + a = powf(a,b); + a = fmaf(a,b,c); + return a; +} +#pragma float_control(pop) zahiraam wrote: That's what I thought. Maybe you can keep this patch to fix the specific bug you are mentioning and someone or you may work on these other additional bugs. https://github.com/llvm/llvm-project/pull/90377 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [clang][CodeGen] Propagate pragma set fast-math flags to floating point builtins (PR #90377)
@@ -0,0 +1,28 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// RUN: %clang_cc1 -triple x86_64-linux-gnu -O3 -emit-llvm -o - %s \ +// RUN: | FileCheck -check-prefix=CHECK %s + karka228 wrote: Sure, I will add that. https://github.com/llvm/llvm-project/pull/90377 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [clang][CodeGen] Propagate pragma set fast-math flags to floating point builtins (PR #90377)
@@ -0,0 +1,28 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// RUN: %clang_cc1 -triple x86_64-linux-gnu -O3 -emit-llvm -o - %s \ karka228 wrote: The only reason for using `-O3` was to get rid of all the allocas, loads and stores in the output that didn't contributed to the test. https://github.com/llvm/llvm-project/pull/90377 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [clang][CodeGen] Propagate pragma set fast-math flags to floating point builtins (PR #90377)
@@ -0,0 +1,28 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// RUN: %clang_cc1 -triple x86_64-linux-gnu -O3 -emit-llvm -o - %s \ +// RUN: | FileCheck -check-prefix=CHECK %s + +// Reproducer for issue #87758 +// The testcase below verifies that the "fast" flag are set on the calls. + +float sqrtf(float x); +float powf(float x, float y); +float fmaf(float x, float y, float z); + +#pragma float_control(push) +#pragma float_control(precise, off) +// CHECK-LABEL: define dso_local float @fp_precise_libm_calls( +// CHECK-SAME: float noundef [[A:%.*]], float noundef [[B:%.*]], float noundef [[C:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = tail call fast float @llvm.sqrt.f32(float [[A]]) +// CHECK-NEXT:[[TMP1:%.*]] = tail call fast float @llvm.pow.f32(float [[TMP0]], float [[B]]) +// CHECK-NEXT:[[TMP2:%.*]] = tail call fast float @llvm.fma.f32(float [[TMP1]], float [[B]], float [[C]]) +// CHECK-NEXT:ret float [[TMP2]] +// +float fp_precise_libm_calls(float a, float b, float c) { + a = sqrtf(a); + a = powf(a,b); + a = fmaf(a,b,c); + return a; +} +#pragma float_control(pop) karka228 wrote: By combining `#pragma float_control(precise,on)` and `-ffast-math` to the testcase it revealed additional bugs, as the "fast" flag was still set. I tried to fix this by changing the function emitLibraryCall() but I'm a bit unsure if this is a correct fix. https://github.com/llvm/llvm-project/pull/90377 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [clang][CodeGen] Propagate pragma set fast-math flags to floating point builtins (PR #90377)
https://github.com/karka228 updated https://github.com/llvm/llvm-project/pull/90377 >From 80836f51573c8ba474a0b5d5890a489d5f0c7034 Mon Sep 17 00:00:00 2001 From: Karl-Johan Karlsson Date: Sat, 27 Apr 2024 22:04:11 +0200 Subject: [PATCH 1/3] Pre-commit reproducer for fast-math flags on calls Adding test case related to issue #87758 The testcase show the faulty behavior where the calls to llvm.pow.f32 and llvm.fma.f32 are not attributed with the "fast" flag. --- clang/test/CodeGen/pr87758.c | 29 + 1 file changed, 29 insertions(+) create mode 100644 clang/test/CodeGen/pr87758.c diff --git a/clang/test/CodeGen/pr87758.c b/clang/test/CodeGen/pr87758.c new file mode 100644 index 00..5c5a6de442c2ac --- /dev/null +++ b/clang/test/CodeGen/pr87758.c @@ -0,0 +1,29 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// RUN: %clang_cc1 -triple x86_64-linux-gnu -O3 -emit-llvm -o - %s \ +// RUN: | FileCheck -check-prefix=CHECK %s + +// FIXME: Reproducer for issue #87758 +// The testcase below show the faulty behavior where the calls to +// llvm.pow.f32 and llvm.fma.f32 are not attributed with the "fast" flag. + +float sqrtf(float x); +float powf(float x, float y); +float fmaf(float x, float y, float z); + +#pragma float_control(push) +#pragma float_control(precise, off) +// CHECK-LABEL: define dso_local float @fp_precise_libm_calls( +// CHECK-SAME: float noundef [[A:%.*]], float noundef [[B:%.*]], float noundef [[C:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = tail call fast float @llvm.sqrt.f32(float [[A]]) +// CHECK-NEXT:[[TMP1:%.*]] = tail call float @llvm.pow.f32(float [[TMP0]], float [[B]]) +// CHECK-NEXT:[[TMP2:%.*]] = tail call float @llvm.fma.f32(float [[TMP1]], float [[B]], float [[C]]) +// CHECK-NEXT:ret float [[TMP2]] +// +float fp_precise_libm_calls(float a, float b, float c) { + a = sqrtf(a); + a = powf(a,b); + a = fmaf(a,b,c); + return a; +} +#pragma float_control(pop) >From 4af840da9ec863f6372083f5e96998fb07db6b9c Mon Sep 17 00:00:00 2001 From: Karl-Johan Karlsson Date: Sat, 27 Apr 2024 22:37:34 +0200 Subject: [PATCH 2/3] [clang][CodeGen] Propagate pragma set fast-math flags to floating point builtins This is a fix for the issue #87758 where fast-math flags are not propagated all builtins. It seems like pragmas with fast math flags was only propagated to calls of unary floating point builtins. This patch propagate them also for binary and ternary floating point builtins. --- clang/lib/CodeGen/CGBuiltin.cpp | 6 +++--- clang/test/CodeGen/pr87758.c| 9 - 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index d08ab539148914..d61a63ac61572d 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -513,8 +513,8 @@ static Value *emitBinaryMaybeConstrainedFPBuiltin(CodeGenFunction , llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0)); llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1)); + CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E); if (CGF.Builder.getIsFPConstrained()) { -CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E); Function *F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID, Src0->getType()); return CGF.Builder.CreateConstrainedFPCall(F, { Src0, Src1 }); } else { @@ -530,8 +530,8 @@ static Value *emitBinaryExpMaybeConstrainedFPBuiltin( llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0)); llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1)); + CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E); if (CGF.Builder.getIsFPConstrained()) { -CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E); Function *F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID, {Src0->getType(), Src1->getType()}); return CGF.Builder.CreateConstrainedFPCall(F, {Src0, Src1}); @@ -551,8 +551,8 @@ static Value *emitTernaryMaybeConstrainedFPBuiltin(CodeGenFunction , llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1)); llvm::Value *Src2 = CGF.EmitScalarExpr(E->getArg(2)); + CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E); if (CGF.Builder.getIsFPConstrained()) { -CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E); Function *F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID, Src0->getType()); return CGF.Builder.CreateConstrainedFPCall(F, { Src0, Src1, Src2 }); } else { diff --git a/clang/test/CodeGen/pr87758.c b/clang/test/CodeGen/pr87758.c index 5c5a6de442c2ac..05b3232986e0a6 100644 --- a/clang/test/CodeGen/pr87758.c +++ b/clang/test/CodeGen/pr87758.c @@ -2,9 +2,8 @@ // RUN: %clang_cc1 -triple x86_64-linux-gnu -O3 -emit-llvm -o - %s \ // RUN: | FileCheck -check-prefix=CHECK %s -// FIXME: Reproducer for issue #87758 -// The testcase below show the faulty behavior where the calls to -// llvm.pow.f32
[clang] [clang][CodeGen] Propagate pragma set fast-math flags to floating point builtins (PR #90377)
@@ -0,0 +1,28 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// RUN: %clang_cc1 -triple x86_64-linux-gnu -O3 -emit-llvm -o - %s \ +// RUN: | FileCheck -check-prefix=CHECK %s + +// Reproducer for issue #87758 +// The testcase below verifies that the "fast" flag are set on the calls. + +float sqrtf(float x); +float powf(float x, float y); +float fmaf(float x, float y, float z); + +#pragma float_control(push) +#pragma float_control(precise, off) +// CHECK-LABEL: define dso_local float @fp_precise_libm_calls( +// CHECK-SAME: float noundef [[A:%.*]], float noundef [[B:%.*]], float noundef [[C:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = tail call fast float @llvm.sqrt.f32(float [[A]]) +// CHECK-NEXT:[[TMP1:%.*]] = tail call fast float @llvm.pow.f32(float [[TMP0]], float [[B]]) +// CHECK-NEXT:[[TMP2:%.*]] = tail call fast float @llvm.fma.f32(float [[TMP1]], float [[B]], float [[C]]) +// CHECK-NEXT:ret float [[TMP2]] +// +float fp_precise_libm_calls(float a, float b, float c) { + a = sqrtf(a); + a = powf(a,b); + a = fmaf(a,b,c); + return a; +} +#pragma float_control(pop) zahiraam wrote: You could also use the same test with `#pragma float_control(precise,on)` and confirm that the fast flags are not set. https://github.com/llvm/llvm-project/pull/90377 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [clang][CodeGen] Propagate pragma set fast-math flags to floating point builtins (PR #90377)
@@ -0,0 +1,28 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// RUN: %clang_cc1 -triple x86_64-linux-gnu -O3 -emit-llvm -o - %s \ +// RUN: | FileCheck -check-prefix=CHECK %s + zahiraam wrote: How about adding a run line with `ffast-math` option? https://github.com/llvm/llvm-project/pull/90377 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [clang][CodeGen] Propagate pragma set fast-math flags to floating point builtins (PR #90377)
@@ -0,0 +1,28 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// RUN: %clang_cc1 -triple x86_64-linux-gnu -O3 -emit-llvm -o - %s \ zahiraam wrote: There is no need to use -O3 here to prove what you want, I think. https://github.com/llvm/llvm-project/pull/90377 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [clang][CodeGen] Propagate pragma set fast-math flags to floating point builtins (PR #90377)
https://github.com/andykaylor approved this pull request. This looks right to me, but @zahiraam is more familiar with this code than I am. Zahira, do you have any comments? https://github.com/llvm/llvm-project/pull/90377 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [clang][CodeGen] Propagate pragma set fast-math flags to floating point builtins (PR #90377)
llvmbot wrote: @llvm/pr-subscribers-clang Author: Karl-Johan Karlsson (karka228) Changes This is a fix for the issue #87758 where fast-math flags are not propagated all builtins. It seems like pragmas with fast math flags was only propagated to calls of unary floating point builtins. This patch propagate them also for binary and ternary floating point builtins. --- Full diff: https://github.com/llvm/llvm-project/pull/90377.diff 2 Files Affected: - (modified) clang/lib/CodeGen/CGBuiltin.cpp (+3-3) - (added) clang/test/CodeGen/pr87758.c (+28) ``diff diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index d08ab539148914..d61a63ac61572d 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -513,8 +513,8 @@ static Value *emitBinaryMaybeConstrainedFPBuiltin(CodeGenFunction , llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0)); llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1)); + CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E); if (CGF.Builder.getIsFPConstrained()) { -CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E); Function *F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID, Src0->getType()); return CGF.Builder.CreateConstrainedFPCall(F, { Src0, Src1 }); } else { @@ -530,8 +530,8 @@ static Value *emitBinaryExpMaybeConstrainedFPBuiltin( llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0)); llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1)); + CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E); if (CGF.Builder.getIsFPConstrained()) { -CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E); Function *F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID, {Src0->getType(), Src1->getType()}); return CGF.Builder.CreateConstrainedFPCall(F, {Src0, Src1}); @@ -551,8 +551,8 @@ static Value *emitTernaryMaybeConstrainedFPBuiltin(CodeGenFunction , llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1)); llvm::Value *Src2 = CGF.EmitScalarExpr(E->getArg(2)); + CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E); if (CGF.Builder.getIsFPConstrained()) { -CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E); Function *F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID, Src0->getType()); return CGF.Builder.CreateConstrainedFPCall(F, { Src0, Src1, Src2 }); } else { diff --git a/clang/test/CodeGen/pr87758.c b/clang/test/CodeGen/pr87758.c new file mode 100644 index 00..05b3232986e0a6 --- /dev/null +++ b/clang/test/CodeGen/pr87758.c @@ -0,0 +1,28 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// RUN: %clang_cc1 -triple x86_64-linux-gnu -O3 -emit-llvm -o - %s \ +// RUN: | FileCheck -check-prefix=CHECK %s + +// Reproducer for issue #87758 +// The testcase below verifies that the "fast" flag are set on the calls. + +float sqrtf(float x); +float powf(float x, float y); +float fmaf(float x, float y, float z); + +#pragma float_control(push) +#pragma float_control(precise, off) +// CHECK-LABEL: define dso_local float @fp_precise_libm_calls( +// CHECK-SAME: float noundef [[A:%.*]], float noundef [[B:%.*]], float noundef [[C:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = tail call fast float @llvm.sqrt.f32(float [[A]]) +// CHECK-NEXT:[[TMP1:%.*]] = tail call fast float @llvm.pow.f32(float [[TMP0]], float [[B]]) +// CHECK-NEXT:[[TMP2:%.*]] = tail call fast float @llvm.fma.f32(float [[TMP1]], float [[B]], float [[C]]) +// CHECK-NEXT:ret float [[TMP2]] +// +float fp_precise_libm_calls(float a, float b, float c) { + a = sqrtf(a); + a = powf(a,b); + a = fmaf(a,b,c); + return a; +} +#pragma float_control(pop) `` https://github.com/llvm/llvm-project/pull/90377 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [clang][CodeGen] Propagate pragma set fast-math flags to floating point builtins (PR #90377)
https://github.com/karka228 created https://github.com/llvm/llvm-project/pull/90377 This is a fix for the issue #87758 where fast-math flags are not propagated all builtins. It seems like pragmas with fast math flags was only propagated to calls of unary floating point builtins. This patch propagate them also for binary and ternary floating point builtins. >From 80836f51573c8ba474a0b5d5890a489d5f0c7034 Mon Sep 17 00:00:00 2001 From: Karl-Johan Karlsson Date: Sat, 27 Apr 2024 22:04:11 +0200 Subject: [PATCH 1/2] Pre-commit reproducer for fast-math flags on calls Adding test case related to issue #87758 The testcase show the faulty behavior where the calls to llvm.pow.f32 and llvm.fma.f32 are not attributed with the "fast" flag. --- clang/test/CodeGen/pr87758.c | 29 + 1 file changed, 29 insertions(+) create mode 100644 clang/test/CodeGen/pr87758.c diff --git a/clang/test/CodeGen/pr87758.c b/clang/test/CodeGen/pr87758.c new file mode 100644 index 00..5c5a6de442c2ac --- /dev/null +++ b/clang/test/CodeGen/pr87758.c @@ -0,0 +1,29 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// RUN: %clang_cc1 -triple x86_64-linux-gnu -O3 -emit-llvm -o - %s \ +// RUN: | FileCheck -check-prefix=CHECK %s + +// FIXME: Reproducer for issue #87758 +// The testcase below show the faulty behavior where the calls to +// llvm.pow.f32 and llvm.fma.f32 are not attributed with the "fast" flag. + +float sqrtf(float x); +float powf(float x, float y); +float fmaf(float x, float y, float z); + +#pragma float_control(push) +#pragma float_control(precise, off) +// CHECK-LABEL: define dso_local float @fp_precise_libm_calls( +// CHECK-SAME: float noundef [[A:%.*]], float noundef [[B:%.*]], float noundef [[C:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = tail call fast float @llvm.sqrt.f32(float [[A]]) +// CHECK-NEXT:[[TMP1:%.*]] = tail call float @llvm.pow.f32(float [[TMP0]], float [[B]]) +// CHECK-NEXT:[[TMP2:%.*]] = tail call float @llvm.fma.f32(float [[TMP1]], float [[B]], float [[C]]) +// CHECK-NEXT:ret float [[TMP2]] +// +float fp_precise_libm_calls(float a, float b, float c) { + a = sqrtf(a); + a = powf(a,b); + a = fmaf(a,b,c); + return a; +} +#pragma float_control(pop) >From 4af840da9ec863f6372083f5e96998fb07db6b9c Mon Sep 17 00:00:00 2001 From: Karl-Johan Karlsson Date: Sat, 27 Apr 2024 22:37:34 +0200 Subject: [PATCH 2/2] [clang][CodeGen] Propagate pragma set fast-math flags to floating point builtins This is a fix for the issue #87758 where fast-math flags are not propagated all builtins. It seems like pragmas with fast math flags was only propagated to calls of unary floating point builtins. This patch propagate them also for binary and ternary floating point builtins. --- clang/lib/CodeGen/CGBuiltin.cpp | 6 +++--- clang/test/CodeGen/pr87758.c| 9 - 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index d08ab539148914..d61a63ac61572d 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -513,8 +513,8 @@ static Value *emitBinaryMaybeConstrainedFPBuiltin(CodeGenFunction , llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0)); llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1)); + CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E); if (CGF.Builder.getIsFPConstrained()) { -CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E); Function *F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID, Src0->getType()); return CGF.Builder.CreateConstrainedFPCall(F, { Src0, Src1 }); } else { @@ -530,8 +530,8 @@ static Value *emitBinaryExpMaybeConstrainedFPBuiltin( llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0)); llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1)); + CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E); if (CGF.Builder.getIsFPConstrained()) { -CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E); Function *F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID, {Src0->getType(), Src1->getType()}); return CGF.Builder.CreateConstrainedFPCall(F, {Src0, Src1}); @@ -551,8 +551,8 @@ static Value *emitTernaryMaybeConstrainedFPBuiltin(CodeGenFunction , llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1)); llvm::Value *Src2 = CGF.EmitScalarExpr(E->getArg(2)); + CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E); if (CGF.Builder.getIsFPConstrained()) { -CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E); Function *F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID, Src0->getType()); return CGF.Builder.CreateConstrainedFPCall(F, { Src0, Src1, Src2 }); } else { diff --git a/clang/test/CodeGen/pr87758.c b/clang/test/CodeGen/pr87758.c index 5c5a6de442c2ac..05b3232986e0a6 100644 --- a/clang/test/CodeGen/pr87758.c +++