https://github.com/arsenm created https://github.com/llvm/llvm-project/pull/179025
We were folding undef inputs to qnan which is incorrect. The instruction never returns nan. Out of bounds segment select will return 0, so fold undef segment to 0. >From c9a865a5e61f41348d1f03df389893cce3e618f0 Mon Sep 17 00:00:00 2001 From: Matt Arsenault <[email protected]> Date: Sat, 31 Jan 2026 09:59:03 +0100 Subject: [PATCH] AMDGPU: Fix incorrect fold of undef for llvm.amdgcn.trig.preop We were folding undef inputs to qnan which is incorrect. The instruction never returns nan. Out of bounds segment select will return 0, so fold undef segment to 0. --- .../AMDGPU/AMDGPUInstCombineIntrinsic.cpp | 30 +++++----- .../InstCombine/AMDGPU/amdgcn-intrinsics.ll | 57 ++++++++++--------- 2 files changed, 44 insertions(+), 43 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp index 2cd1902785546..467236e57863a 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp @@ -1459,30 +1459,30 @@ GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const { if (isa<PoisonValue>(Src) || isa<PoisonValue>(Segment)) return IC.replaceInstUsesWith(II, PoisonValue::get(II.getType())); - if (isa<UndefValue>(Src)) { - auto *QNaN = ConstantFP::get( - II.getType(), APFloat::getQNaN(II.getType()->getFltSemantics())); - return IC.replaceInstUsesWith(II, QNaN); - } + if (isa<UndefValue>(Segment)) + return IC.replaceInstUsesWith(II, ConstantFP::getZero(II.getType())); - const ConstantFP *Csrc = dyn_cast<ConstantFP>(Src); - if (!Csrc) + if (II.isStrictFP()) break; - if (II.isStrictFP()) + const ConstantFP *CSrc = dyn_cast<ConstantFP>(Src); + if (!CSrc && !isa<UndefValue>(Src)) break; - const APFloat &Fsrc = Csrc->getValueAPF(); - if (Fsrc.isNaN()) { - auto *Quieted = ConstantFP::get(II.getType(), Fsrc.makeQuiet()); - return IC.replaceInstUsesWith(II, Quieted); - } + // The instruction ignores special cases, and literally just extracts the + // exponents. Fold undef to nan, and index the table as normal. + APInt FSrcInt = CSrc ? CSrc->getValueAPF().bitcastToAPInt() + : APFloat::getQNaN(II.getType()->getFltSemantics()) + .bitcastToAPInt(); const ConstantInt *Cseg = dyn_cast<ConstantInt>(Segment); - if (!Cseg) + if (!Cseg) { + if (isa<UndefValue>(Src)) + return IC.replaceInstUsesWith(II, ConstantFP::getZero(II.getType())); break; + } - unsigned Exponent = Fsrc.bitcastToAPInt().extractBitsAsZExtValue(11, 52); + unsigned Exponent = FSrcInt.extractBitsAsZExtValue(11, 52); unsigned SegmentVal = Cseg->getValue().trunc(5).getZExtValue(); unsigned Shift = SegmentVal * 53; if (Exponent > 1077) diff --git a/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll b/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll index 3ff9439040438..45e7896aaa7b7 100644 --- a/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll +++ b/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll @@ -74,7 +74,7 @@ define double @test_constant_fold_rcp_f64_43() nounwind { define float @test_constant_fold_rcp_f32_43_strictfp() nounwind strictfp { ; CHECK-LABEL: @test_constant_fold_rcp_f32_43_strictfp( -; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.rcp.f32(float 4.300000e+01) #[[ATTR17:[0-9]+]] +; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.rcp.f32(float 4.300000e+01) #[[ATTR18:[0-9]+]] ; CHECK-NEXT: ret float [[VAL]] ; %val = call float @llvm.amdgcn.rcp.f32(float 4.300000e+01) strictfp nounwind readnone @@ -139,7 +139,7 @@ define half @test_constant_fold_sqrt_f16_0() nounwind { define float @test_constant_fold_sqrt_f32_0() nounwind { ; CHECK-LABEL: @test_constant_fold_sqrt_f32_0( -; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.sqrt.f32(float 0.000000e+00) #[[ATTR18:[0-9]+]] +; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.sqrt.f32(float 0.000000e+00) #[[ATTR19:[0-9]+]] ; CHECK-NEXT: ret float [[VAL]] ; %val = call float @llvm.amdgcn.sqrt.f32(float 0.0) nounwind readnone @@ -148,7 +148,7 @@ define float @test_constant_fold_sqrt_f32_0() nounwind { define double @test_constant_fold_sqrt_f64_0() nounwind { ; CHECK-LABEL: @test_constant_fold_sqrt_f64_0( -; CHECK-NEXT: [[VAL:%.*]] = call double @llvm.amdgcn.sqrt.f64(double 0.000000e+00) #[[ATTR18]] +; CHECK-NEXT: [[VAL:%.*]] = call double @llvm.amdgcn.sqrt.f64(double 0.000000e+00) #[[ATTR19]] ; CHECK-NEXT: ret double [[VAL]] ; %val = call double @llvm.amdgcn.sqrt.f64(double 0.0) nounwind readnone @@ -165,7 +165,7 @@ define half @test_constant_fold_sqrt_f16_neg0() nounwind { define float @test_constant_fold_sqrt_f32_neg0() nounwind { ; CHECK-LABEL: @test_constant_fold_sqrt_f32_neg0( -; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.sqrt.f32(float -0.000000e+00) #[[ATTR18]] +; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.sqrt.f32(float -0.000000e+00) #[[ATTR19]] ; CHECK-NEXT: ret float [[VAL]] ; %val = call float @llvm.amdgcn.sqrt.f32(float -0.0) nounwind readnone @@ -174,7 +174,7 @@ define float @test_constant_fold_sqrt_f32_neg0() nounwind { define double @test_constant_fold_sqrt_f64_neg0() nounwind { ; CHECK-LABEL: @test_constant_fold_sqrt_f64_neg0( -; CHECK-NEXT: [[VAL:%.*]] = call double @llvm.amdgcn.sqrt.f64(double -0.000000e+00) #[[ATTR18]] +; CHECK-NEXT: [[VAL:%.*]] = call double @llvm.amdgcn.sqrt.f64(double -0.000000e+00) #[[ATTR19]] ; CHECK-NEXT: ret double [[VAL]] ; %val = call double @llvm.amdgcn.sqrt.f64(double -0.0) nounwind readnone @@ -766,7 +766,7 @@ define i1 @test_class_isnan_f32(float %x) nounwind { define i1 @test_class_isnan_f32_strict(float %x) nounwind strictfp { ; CHECK-LABEL: @test_class_isnan_f32_strict( -; CHECK-NEXT: [[VAL:%.*]] = call i1 @llvm.is.fpclass.f32(float [[X:%.*]], i32 3) #[[ATTR19:[0-9]+]] +; CHECK-NEXT: [[VAL:%.*]] = call i1 @llvm.is.fpclass.f32(float [[X:%.*]], i32 3) #[[ATTR20:[0-9]+]] ; CHECK-NEXT: ret i1 [[VAL]] ; %val = call i1 @llvm.amdgcn.class.f32(float %x, i32 3) strictfp @@ -784,7 +784,7 @@ define i1 @test_class_is_p0_n0_f32(float %x) nounwind { define i1 @test_class_is_p0_n0_f32_strict(float %x) nounwind strictfp { ; CHECK-LABEL: @test_class_is_p0_n0_f32_strict( -; CHECK-NEXT: [[VAL:%.*]] = call i1 @llvm.is.fpclass.f32(float [[X:%.*]], i32 96) #[[ATTR19]] +; CHECK-NEXT: [[VAL:%.*]] = call i1 @llvm.is.fpclass.f32(float [[X:%.*]], i32 96) #[[ATTR20]] ; CHECK-NEXT: ret i1 [[VAL]] ; %val = call i1 @llvm.amdgcn.class.f32(float %x, i32 96) strictfp @@ -1882,7 +1882,7 @@ define i64 @icmp_constant_inputs_false() { define i64 @icmp_constant_inputs_true() { ; CHECK-LABEL: @icmp_constant_inputs_true( -; CHECK-NEXT: [[RESULT:%.*]] = call i64 @llvm.read_register.i64(metadata [[META0:![0-9]+]]) #[[ATTR20:[0-9]+]] +; CHECK-NEXT: [[RESULT:%.*]] = call i64 @llvm.read_register.i64(metadata [[META0:![0-9]+]]) #[[ATTR21:[0-9]+]] ; CHECK-NEXT: ret i64 [[RESULT]] ; %result = call i64 @llvm.amdgcn.icmp.i64.i32(i32 9, i32 8, i32 34) @@ -2589,7 +2589,7 @@ define i64 @fcmp_constant_inputs_false() { define i64 @fcmp_constant_inputs_true() { ; CHECK-LABEL: @fcmp_constant_inputs_true( -; CHECK-NEXT: [[RESULT:%.*]] = call i64 @llvm.read_register.i64(metadata [[META0]]) #[[ATTR20]] +; CHECK-NEXT: [[RESULT:%.*]] = call i64 @llvm.read_register.i64(metadata [[META0]]) #[[ATTR21]] ; CHECK-NEXT: ret i64 [[RESULT]] ; %result = call i64 @llvm.amdgcn.fcmp.i64.f32(float 2.0, float 4.0, i32 4) @@ -5614,7 +5614,7 @@ declare float @llvm.amdgcn.trig.preop.f32(float, i32) define double @trig_preop_constfold_variable_undef_arg(i32 %arg) { ; CHECK-LABEL: @trig_preop_constfold_variable_undef_arg( -; CHECK-NEXT: ret double 0x7FF8000000000000 +; CHECK-NEXT: ret double 0.000000e+00 ; %val = call double @llvm.amdgcn.trig.preop.f64(double undef, i32 %arg) ret double %val @@ -5630,8 +5630,7 @@ define double @trig_preop_constfold_variable_poison_arg(i32 %arg) { define double @trig_preop_constfold_variable_arg_undef(double %arg) { ; CHECK-LABEL: @trig_preop_constfold_variable_arg_undef( -; CHECK-NEXT: [[VAL:%.*]] = call double @llvm.amdgcn.trig.preop.f64(double [[ARG:%.*]], i32 undef) -; CHECK-NEXT: ret double [[VAL]] +; CHECK-NEXT: ret double 0.000000e+00 ; %val = call double @llvm.amdgcn.trig.preop.f64(double %arg, i32 undef) ret double %val @@ -5656,7 +5655,8 @@ define double @trig_preop_constfold_variable_int(i32 %arg) { define double @trig_preop_qnan(i32 %arg) { ; CHECK-LABEL: @trig_preop_qnan( -; CHECK-NEXT: ret double 0x7FF8000000000000 +; CHECK-NEXT: [[VAL:%.*]] = call double @llvm.amdgcn.trig.preop.f64(double 0x7FF8000000000000, i32 [[ARG:%.*]]) +; CHECK-NEXT: ret double [[VAL]] ; %val = call double @llvm.amdgcn.trig.preop.f64(double 0x7FF8000000000000, i32 %arg) ret double %val @@ -5664,7 +5664,8 @@ define double @trig_preop_qnan(i32 %arg) { define double @trig_preop_snan(i32 %arg) { ; CHECK-LABEL: @trig_preop_snan( -; CHECK-NEXT: ret double 0x7FF8000000000001 +; CHECK-NEXT: [[VAL:%.*]] = call double @llvm.amdgcn.trig.preop.f64(double 0x7FF0000000000001, i32 [[ARG:%.*]]) +; CHECK-NEXT: ret double [[VAL]] ; %val = call double @llvm.amdgcn.trig.preop.f64(double 0x7FF0000000000001, i32 %arg) ret double %val @@ -5741,7 +5742,7 @@ define double @trig_preop_constfold_neg32_segment() { define double @trig_preop_constfold_strictfp() strictfp { ; CHECK-LABEL: @trig_preop_constfold_strictfp( -; CHECK-NEXT: [[VAL:%.*]] = call double @llvm.amdgcn.trig.preop.f64(double 3.454350e+02, i32 5) #[[ATTR19]] +; CHECK-NEXT: [[VAL:%.*]] = call double @llvm.amdgcn.trig.preop.f64(double 3.454350e+02, i32 5) #[[ATTR20]] ; CHECK-NEXT: ret double [[VAL]] ; %val = call double @llvm.amdgcn.trig.preop.f64(double 3.454350e+02, i32 5) strictfp @@ -6110,7 +6111,7 @@ define half @test_constant_fold_log_f16_neg10() { define float @test_constant_fold_log_f32_qnan_strictfp() strictfp { ; CHECK-LABEL: @test_constant_fold_log_f32_qnan_strictfp( -; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.log.f32(float 0x7FF8000000000000) #[[ATTR19]] +; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.log.f32(float 0x7FF8000000000000) #[[ATTR20]] ; CHECK-NEXT: ret float [[VAL]] ; %val = call float @llvm.amdgcn.log.f32(float 0x7FF8000000000000) strictfp @@ -6119,7 +6120,7 @@ define float @test_constant_fold_log_f32_qnan_strictfp() strictfp { define float @test_constant_fold_log_f32_0_strictfp() strictfp { ; CHECK-LABEL: @test_constant_fold_log_f32_0_strictfp( -; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.log.f32(float 0.000000e+00) #[[ATTR19]] +; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.log.f32(float 0.000000e+00) #[[ATTR20]] ; CHECK-NEXT: ret float [[VAL]] ; %val = call float @llvm.amdgcn.log.f32(float 0.0) strictfp @@ -6128,7 +6129,7 @@ define float @test_constant_fold_log_f32_0_strictfp() strictfp { define float @test_constant_fold_log_f32_neg0_strictfp() strictfp { ; CHECK-LABEL: @test_constant_fold_log_f32_neg0_strictfp( -; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.log.f32(float -0.000000e+00) #[[ATTR19]] +; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.log.f32(float -0.000000e+00) #[[ATTR20]] ; CHECK-NEXT: ret float [[VAL]] ; %val = call float @llvm.amdgcn.log.f32(float -0.0) strictfp @@ -6137,7 +6138,7 @@ define float @test_constant_fold_log_f32_neg0_strictfp() strictfp { define float @test_constant_fold_log_f32_neg_strictfp() strictfp { ; CHECK-LABEL: @test_constant_fold_log_f32_neg_strictfp( -; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.log.f32(float -1.000000e+01) #[[ATTR19]] +; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.log.f32(float -1.000000e+01) #[[ATTR20]] ; CHECK-NEXT: ret float [[VAL]] ; %val = call float @llvm.amdgcn.log.f32(float -10.0) strictfp @@ -6154,7 +6155,7 @@ define float @test_constant_fold_log_f32_pinf_strictfp() strictfp { define float @test_constant_fold_log_f32_ninf_strictfp() strictfp { ; CHECK-LABEL: @test_constant_fold_log_f32_ninf_strictfp( -; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.log.f32(float 0xFFF0000000000000) #[[ATTR19]] +; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.log.f32(float 0xFFF0000000000000) #[[ATTR20]] ; CHECK-NEXT: ret float [[VAL]] ; %val = call float @llvm.amdgcn.log.f32(float 0xFFF0000000000000) strictfp @@ -6356,7 +6357,7 @@ define half @test_constant_fold_exp2_f16_neg10() { define float @test_constant_fold_exp2_f32_qnan_strictfp() strictfp { ; CHECK-LABEL: @test_constant_fold_exp2_f32_qnan_strictfp( -; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.exp2.f32(float 0x7FF8000000000000) #[[ATTR19]] +; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.exp2.f32(float 0x7FF8000000000000) #[[ATTR20]] ; CHECK-NEXT: ret float [[VAL]] ; %val = call float @llvm.amdgcn.exp2.f32(float 0x7FF8000000000000) strictfp @@ -6365,7 +6366,7 @@ define float @test_constant_fold_exp2_f32_qnan_strictfp() strictfp { define float @test_constant_fold_exp2_f32_0_strictfp() strictfp { ; CHECK-LABEL: @test_constant_fold_exp2_f32_0_strictfp( -; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.exp2.f32(float 0.000000e+00) #[[ATTR19]] +; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.exp2.f32(float 0.000000e+00) #[[ATTR20]] ; CHECK-NEXT: ret float [[VAL]] ; %val = call float @llvm.amdgcn.exp2.f32(float 0.0) strictfp @@ -6374,7 +6375,7 @@ define float @test_constant_fold_exp2_f32_0_strictfp() strictfp { define float @test_constant_fold_exp2_f32_neg0_strictfp() strictfp { ; CHECK-LABEL: @test_constant_fold_exp2_f32_neg0_strictfp( -; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.exp2.f32(float -0.000000e+00) #[[ATTR19]] +; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.exp2.f32(float -0.000000e+00) #[[ATTR20]] ; CHECK-NEXT: ret float [[VAL]] ; %val = call float @llvm.amdgcn.exp2.f32(float -0.0) strictfp @@ -6383,7 +6384,7 @@ define float @test_constant_fold_exp2_f32_neg0_strictfp() strictfp { define float @test_constant_fold_exp2_f32_1_strictfp() strictfp { ; CHECK-LABEL: @test_constant_fold_exp2_f32_1_strictfp( -; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.exp2.f32(float 1.000000e+00) #[[ATTR19]] +; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.exp2.f32(float 1.000000e+00) #[[ATTR20]] ; CHECK-NEXT: ret float [[VAL]] ; %val = call float @llvm.amdgcn.exp2.f32(float 1.0) strictfp @@ -6392,7 +6393,7 @@ define float @test_constant_fold_exp2_f32_1_strictfp() strictfp { define float @test_constant_fold_exp2_f32_neg1_strictfp() strictfp { ; CHECK-LABEL: @test_constant_fold_exp2_f32_neg1_strictfp( -; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.exp2.f32(float -1.000000e+00) #[[ATTR19]] +; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.exp2.f32(float -1.000000e+00) #[[ATTR20]] ; CHECK-NEXT: ret float [[VAL]] ; %val = call float @llvm.amdgcn.exp2.f32(float -1.0) strictfp @@ -6401,7 +6402,7 @@ define float @test_constant_fold_exp2_f32_neg1_strictfp() strictfp { define float @test_constant_fold_exp2_f32_2_strictfp() strictfp { ; CHECK-LABEL: @test_constant_fold_exp2_f32_2_strictfp( -; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.exp2.f32(float 2.000000e+00) #[[ATTR19]] +; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.exp2.f32(float 2.000000e+00) #[[ATTR20]] ; CHECK-NEXT: ret float [[VAL]] ; %val = call float @llvm.amdgcn.exp2.f32(float 2.0) strictfp @@ -6410,7 +6411,7 @@ define float @test_constant_fold_exp2_f32_2_strictfp() strictfp { define float @test_constant_fold_exp2_f32_neg2_strictfp() strictfp { ; CHECK-LABEL: @test_constant_fold_exp2_f32_neg2_strictfp( -; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.exp2.f32(float -2.000000e+00) #[[ATTR19]] +; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.exp2.f32(float -2.000000e+00) #[[ATTR20]] ; CHECK-NEXT: ret float [[VAL]] ; %val = call float @llvm.amdgcn.exp2.f32(float -2.0) strictfp @@ -6419,7 +6420,7 @@ define float @test_constant_fold_exp2_f32_neg2_strictfp() strictfp { define float @test_constant_fold_exp2_f32_neg_strictfp() strictfp { ; CHECK-LABEL: @test_constant_fold_exp2_f32_neg_strictfp( -; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.exp2.f32(float -1.000000e+01) #[[ATTR19]] +; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.exp2.f32(float -1.000000e+01) #[[ATTR20]] ; CHECK-NEXT: ret float [[VAL]] ; %val = call float @llvm.amdgcn.exp2.f32(float -10.0) strictfp _______________________________________________ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
