llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-backend-amdgpu Author: Matt Arsenault (arsenm) <details> <summary>Changes</summary> We were folding undef inputs to qnan which is incorrect. The instruction never returns nan. Out of bounds segment select will return 0, so fold undef segment to 0. --- Full diff: https://github.com/llvm/llvm-project/pull/179025.diff 2 Files Affected: - (modified) llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp (+15-15) - (modified) llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll (+29-28) ``````````diff diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp index 2cd1902785546..467236e57863a 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp @@ -1459,30 +1459,30 @@ GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const { if (isa<PoisonValue>(Src) || isa<PoisonValue>(Segment)) return IC.replaceInstUsesWith(II, PoisonValue::get(II.getType())); - if (isa<UndefValue>(Src)) { - auto *QNaN = ConstantFP::get( - II.getType(), APFloat::getQNaN(II.getType()->getFltSemantics())); - return IC.replaceInstUsesWith(II, QNaN); - } + if (isa<UndefValue>(Segment)) + return IC.replaceInstUsesWith(II, ConstantFP::getZero(II.getType())); - const ConstantFP *Csrc = dyn_cast<ConstantFP>(Src); - if (!Csrc) + if (II.isStrictFP()) break; - if (II.isStrictFP()) + const ConstantFP *CSrc = dyn_cast<ConstantFP>(Src); + if (!CSrc && !isa<UndefValue>(Src)) break; - const APFloat &Fsrc = Csrc->getValueAPF(); - if (Fsrc.isNaN()) { - auto *Quieted = ConstantFP::get(II.getType(), Fsrc.makeQuiet()); - return IC.replaceInstUsesWith(II, Quieted); - } + // The instruction ignores special cases, and literally just extracts the + // exponents. Fold undef to nan, and index the table as normal. + APInt FSrcInt = CSrc ? CSrc->getValueAPF().bitcastToAPInt() + : APFloat::getQNaN(II.getType()->getFltSemantics()) + .bitcastToAPInt(); const ConstantInt *Cseg = dyn_cast<ConstantInt>(Segment); - if (!Cseg) + if (!Cseg) { + if (isa<UndefValue>(Src)) + return IC.replaceInstUsesWith(II, ConstantFP::getZero(II.getType())); break; + } - unsigned Exponent = Fsrc.bitcastToAPInt().extractBitsAsZExtValue(11, 52); + unsigned Exponent = FSrcInt.extractBitsAsZExtValue(11, 52); unsigned SegmentVal = Cseg->getValue().trunc(5).getZExtValue(); unsigned Shift = SegmentVal * 53; if (Exponent > 1077) diff --git a/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll b/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll index 3ff9439040438..45e7896aaa7b7 100644 --- a/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll +++ b/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll @@ -74,7 +74,7 @@ define double @test_constant_fold_rcp_f64_43() nounwind { define float @test_constant_fold_rcp_f32_43_strictfp() nounwind strictfp { ; CHECK-LABEL: @test_constant_fold_rcp_f32_43_strictfp( -; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.rcp.f32(float 4.300000e+01) #[[ATTR17:[0-9]+]] +; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.rcp.f32(float 4.300000e+01) #[[ATTR18:[0-9]+]] ; CHECK-NEXT: ret float [[VAL]] ; %val = call float @llvm.amdgcn.rcp.f32(float 4.300000e+01) strictfp nounwind readnone @@ -139,7 +139,7 @@ define half @test_constant_fold_sqrt_f16_0() nounwind { define float @test_constant_fold_sqrt_f32_0() nounwind { ; CHECK-LABEL: @test_constant_fold_sqrt_f32_0( -; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.sqrt.f32(float 0.000000e+00) #[[ATTR18:[0-9]+]] +; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.sqrt.f32(float 0.000000e+00) #[[ATTR19:[0-9]+]] ; CHECK-NEXT: ret float [[VAL]] ; %val = call float @llvm.amdgcn.sqrt.f32(float 0.0) nounwind readnone @@ -148,7 +148,7 @@ define float @test_constant_fold_sqrt_f32_0() nounwind { define double @test_constant_fold_sqrt_f64_0() nounwind { ; CHECK-LABEL: @test_constant_fold_sqrt_f64_0( -; CHECK-NEXT: [[VAL:%.*]] = call double @llvm.amdgcn.sqrt.f64(double 0.000000e+00) #[[ATTR18]] +; CHECK-NEXT: [[VAL:%.*]] = call double @llvm.amdgcn.sqrt.f64(double 0.000000e+00) #[[ATTR19]] ; CHECK-NEXT: ret double [[VAL]] ; %val = call double @llvm.amdgcn.sqrt.f64(double 0.0) nounwind readnone @@ -165,7 +165,7 @@ define half @test_constant_fold_sqrt_f16_neg0() nounwind { define float @test_constant_fold_sqrt_f32_neg0() nounwind { ; CHECK-LABEL: @test_constant_fold_sqrt_f32_neg0( -; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.sqrt.f32(float -0.000000e+00) #[[ATTR18]] +; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.sqrt.f32(float -0.000000e+00) #[[ATTR19]] ; CHECK-NEXT: ret float [[VAL]] ; %val = call float @llvm.amdgcn.sqrt.f32(float -0.0) nounwind readnone @@ -174,7 +174,7 @@ define float @test_constant_fold_sqrt_f32_neg0() nounwind { define double @test_constant_fold_sqrt_f64_neg0() nounwind { ; CHECK-LABEL: @test_constant_fold_sqrt_f64_neg0( -; CHECK-NEXT: [[VAL:%.*]] = call double @llvm.amdgcn.sqrt.f64(double -0.000000e+00) #[[ATTR18]] +; CHECK-NEXT: [[VAL:%.*]] = call double @llvm.amdgcn.sqrt.f64(double -0.000000e+00) #[[ATTR19]] ; CHECK-NEXT: ret double [[VAL]] ; %val = call double @llvm.amdgcn.sqrt.f64(double -0.0) nounwind readnone @@ -766,7 +766,7 @@ define i1 @test_class_isnan_f32(float %x) nounwind { define i1 @test_class_isnan_f32_strict(float %x) nounwind strictfp { ; CHECK-LABEL: @test_class_isnan_f32_strict( -; CHECK-NEXT: [[VAL:%.*]] = call i1 @llvm.is.fpclass.f32(float [[X:%.*]], i32 3) #[[ATTR19:[0-9]+]] +; CHECK-NEXT: [[VAL:%.*]] = call i1 @llvm.is.fpclass.f32(float [[X:%.*]], i32 3) #[[ATTR20:[0-9]+]] ; CHECK-NEXT: ret i1 [[VAL]] ; %val = call i1 @llvm.amdgcn.class.f32(float %x, i32 3) strictfp @@ -784,7 +784,7 @@ define i1 @test_class_is_p0_n0_f32(float %x) nounwind { define i1 @test_class_is_p0_n0_f32_strict(float %x) nounwind strictfp { ; CHECK-LABEL: @test_class_is_p0_n0_f32_strict( -; CHECK-NEXT: [[VAL:%.*]] = call i1 @llvm.is.fpclass.f32(float [[X:%.*]], i32 96) #[[ATTR19]] +; CHECK-NEXT: [[VAL:%.*]] = call i1 @llvm.is.fpclass.f32(float [[X:%.*]], i32 96) #[[ATTR20]] ; CHECK-NEXT: ret i1 [[VAL]] ; %val = call i1 @llvm.amdgcn.class.f32(float %x, i32 96) strictfp @@ -1882,7 +1882,7 @@ define i64 @icmp_constant_inputs_false() { define i64 @icmp_constant_inputs_true() { ; CHECK-LABEL: @icmp_constant_inputs_true( -; CHECK-NEXT: [[RESULT:%.*]] = call i64 @llvm.read_register.i64(metadata [[META0:![0-9]+]]) #[[ATTR20:[0-9]+]] +; CHECK-NEXT: [[RESULT:%.*]] = call i64 @llvm.read_register.i64(metadata [[META0:![0-9]+]]) #[[ATTR21:[0-9]+]] ; CHECK-NEXT: ret i64 [[RESULT]] ; %result = call i64 @llvm.amdgcn.icmp.i64.i32(i32 9, i32 8, i32 34) @@ -2589,7 +2589,7 @@ define i64 @fcmp_constant_inputs_false() { define i64 @fcmp_constant_inputs_true() { ; CHECK-LABEL: @fcmp_constant_inputs_true( -; CHECK-NEXT: [[RESULT:%.*]] = call i64 @llvm.read_register.i64(metadata [[META0]]) #[[ATTR20]] +; CHECK-NEXT: [[RESULT:%.*]] = call i64 @llvm.read_register.i64(metadata [[META0]]) #[[ATTR21]] ; CHECK-NEXT: ret i64 [[RESULT]] ; %result = call i64 @llvm.amdgcn.fcmp.i64.f32(float 2.0, float 4.0, i32 4) @@ -5614,7 +5614,7 @@ declare float @llvm.amdgcn.trig.preop.f32(float, i32) define double @trig_preop_constfold_variable_undef_arg(i32 %arg) { ; CHECK-LABEL: @trig_preop_constfold_variable_undef_arg( -; CHECK-NEXT: ret double 0x7FF8000000000000 +; CHECK-NEXT: ret double 0.000000e+00 ; %val = call double @llvm.amdgcn.trig.preop.f64(double undef, i32 %arg) ret double %val @@ -5630,8 +5630,7 @@ define double @trig_preop_constfold_variable_poison_arg(i32 %arg) { define double @trig_preop_constfold_variable_arg_undef(double %arg) { ; CHECK-LABEL: @trig_preop_constfold_variable_arg_undef( -; CHECK-NEXT: [[VAL:%.*]] = call double @llvm.amdgcn.trig.preop.f64(double [[ARG:%.*]], i32 undef) -; CHECK-NEXT: ret double [[VAL]] +; CHECK-NEXT: ret double 0.000000e+00 ; %val = call double @llvm.amdgcn.trig.preop.f64(double %arg, i32 undef) ret double %val @@ -5656,7 +5655,8 @@ define double @trig_preop_constfold_variable_int(i32 %arg) { define double @trig_preop_qnan(i32 %arg) { ; CHECK-LABEL: @trig_preop_qnan( -; CHECK-NEXT: ret double 0x7FF8000000000000 +; CHECK-NEXT: [[VAL:%.*]] = call double @llvm.amdgcn.trig.preop.f64(double 0x7FF8000000000000, i32 [[ARG:%.*]]) +; CHECK-NEXT: ret double [[VAL]] ; %val = call double @llvm.amdgcn.trig.preop.f64(double 0x7FF8000000000000, i32 %arg) ret double %val @@ -5664,7 +5664,8 @@ define double @trig_preop_qnan(i32 %arg) { define double @trig_preop_snan(i32 %arg) { ; CHECK-LABEL: @trig_preop_snan( -; CHECK-NEXT: ret double 0x7FF8000000000001 +; CHECK-NEXT: [[VAL:%.*]] = call double @llvm.amdgcn.trig.preop.f64(double 0x7FF0000000000001, i32 [[ARG:%.*]]) +; CHECK-NEXT: ret double [[VAL]] ; %val = call double @llvm.amdgcn.trig.preop.f64(double 0x7FF0000000000001, i32 %arg) ret double %val @@ -5741,7 +5742,7 @@ define double @trig_preop_constfold_neg32_segment() { define double @trig_preop_constfold_strictfp() strictfp { ; CHECK-LABEL: @trig_preop_constfold_strictfp( -; CHECK-NEXT: [[VAL:%.*]] = call double @llvm.amdgcn.trig.preop.f64(double 3.454350e+02, i32 5) #[[ATTR19]] +; CHECK-NEXT: [[VAL:%.*]] = call double @llvm.amdgcn.trig.preop.f64(double 3.454350e+02, i32 5) #[[ATTR20]] ; CHECK-NEXT: ret double [[VAL]] ; %val = call double @llvm.amdgcn.trig.preop.f64(double 3.454350e+02, i32 5) strictfp @@ -6110,7 +6111,7 @@ define half @test_constant_fold_log_f16_neg10() { define float @test_constant_fold_log_f32_qnan_strictfp() strictfp { ; CHECK-LABEL: @test_constant_fold_log_f32_qnan_strictfp( -; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.log.f32(float 0x7FF8000000000000) #[[ATTR19]] +; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.log.f32(float 0x7FF8000000000000) #[[ATTR20]] ; CHECK-NEXT: ret float [[VAL]] ; %val = call float @llvm.amdgcn.log.f32(float 0x7FF8000000000000) strictfp @@ -6119,7 +6120,7 @@ define float @test_constant_fold_log_f32_qnan_strictfp() strictfp { define float @test_constant_fold_log_f32_0_strictfp() strictfp { ; CHECK-LABEL: @test_constant_fold_log_f32_0_strictfp( -; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.log.f32(float 0.000000e+00) #[[ATTR19]] +; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.log.f32(float 0.000000e+00) #[[ATTR20]] ; CHECK-NEXT: ret float [[VAL]] ; %val = call float @llvm.amdgcn.log.f32(float 0.0) strictfp @@ -6128,7 +6129,7 @@ define float @test_constant_fold_log_f32_0_strictfp() strictfp { define float @test_constant_fold_log_f32_neg0_strictfp() strictfp { ; CHECK-LABEL: @test_constant_fold_log_f32_neg0_strictfp( -; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.log.f32(float -0.000000e+00) #[[ATTR19]] +; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.log.f32(float -0.000000e+00) #[[ATTR20]] ; CHECK-NEXT: ret float [[VAL]] ; %val = call float @llvm.amdgcn.log.f32(float -0.0) strictfp @@ -6137,7 +6138,7 @@ define float @test_constant_fold_log_f32_neg0_strictfp() strictfp { define float @test_constant_fold_log_f32_neg_strictfp() strictfp { ; CHECK-LABEL: @test_constant_fold_log_f32_neg_strictfp( -; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.log.f32(float -1.000000e+01) #[[ATTR19]] +; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.log.f32(float -1.000000e+01) #[[ATTR20]] ; CHECK-NEXT: ret float [[VAL]] ; %val = call float @llvm.amdgcn.log.f32(float -10.0) strictfp @@ -6154,7 +6155,7 @@ define float @test_constant_fold_log_f32_pinf_strictfp() strictfp { define float @test_constant_fold_log_f32_ninf_strictfp() strictfp { ; CHECK-LABEL: @test_constant_fold_log_f32_ninf_strictfp( -; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.log.f32(float 0xFFF0000000000000) #[[ATTR19]] +; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.log.f32(float 0xFFF0000000000000) #[[ATTR20]] ; CHECK-NEXT: ret float [[VAL]] ; %val = call float @llvm.amdgcn.log.f32(float 0xFFF0000000000000) strictfp @@ -6356,7 +6357,7 @@ define half @test_constant_fold_exp2_f16_neg10() { define float @test_constant_fold_exp2_f32_qnan_strictfp() strictfp { ; CHECK-LABEL: @test_constant_fold_exp2_f32_qnan_strictfp( -; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.exp2.f32(float 0x7FF8000000000000) #[[ATTR19]] +; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.exp2.f32(float 0x7FF8000000000000) #[[ATTR20]] ; CHECK-NEXT: ret float [[VAL]] ; %val = call float @llvm.amdgcn.exp2.f32(float 0x7FF8000000000000) strictfp @@ -6365,7 +6366,7 @@ define float @test_constant_fold_exp2_f32_qnan_strictfp() strictfp { define float @test_constant_fold_exp2_f32_0_strictfp() strictfp { ; CHECK-LABEL: @test_constant_fold_exp2_f32_0_strictfp( -; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.exp2.f32(float 0.000000e+00) #[[ATTR19]] +; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.exp2.f32(float 0.000000e+00) #[[ATTR20]] ; CHECK-NEXT: ret float [[VAL]] ; %val = call float @llvm.amdgcn.exp2.f32(float 0.0) strictfp @@ -6374,7 +6375,7 @@ define float @test_constant_fold_exp2_f32_0_strictfp() strictfp { define float @test_constant_fold_exp2_f32_neg0_strictfp() strictfp { ; CHECK-LABEL: @test_constant_fold_exp2_f32_neg0_strictfp( -; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.exp2.f32(float -0.000000e+00) #[[ATTR19]] +; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.exp2.f32(float -0.000000e+00) #[[ATTR20]] ; CHECK-NEXT: ret float [[VAL]] ; %val = call float @llvm.amdgcn.exp2.f32(float -0.0) strictfp @@ -6383,7 +6384,7 @@ define float @test_constant_fold_exp2_f32_neg0_strictfp() strictfp { define float @test_constant_fold_exp2_f32_1_strictfp() strictfp { ; CHECK-LABEL: @test_constant_fold_exp2_f32_1_strictfp( -; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.exp2.f32(float 1.000000e+00) #[[ATTR19]] +; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.exp2.f32(float 1.000000e+00) #[[ATTR20]] ; CHECK-NEXT: ret float [[VAL]] ; %val = call float @llvm.amdgcn.exp2.f32(float 1.0) strictfp @@ -6392,7 +6393,7 @@ define float @test_constant_fold_exp2_f32_1_strictfp() strictfp { define float @test_constant_fold_exp2_f32_neg1_strictfp() strictfp { ; CHECK-LABEL: @test_constant_fold_exp2_f32_neg1_strictfp( -; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.exp2.f32(float -1.000000e+00) #[[ATTR19]] +; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.exp2.f32(float -1.000000e+00) #[[ATTR20]] ; CHECK-NEXT: ret float [[VAL]] ; %val = call float @llvm.amdgcn.exp2.f32(float -1.0) strictfp @@ -6401,7 +6402,7 @@ define float @test_constant_fold_exp2_f32_neg1_strictfp() strictfp { define float @test_constant_fold_exp2_f32_2_strictfp() strictfp { ; CHECK-LABEL: @test_constant_fold_exp2_f32_2_strictfp( -; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.exp2.f32(float 2.000000e+00) #[[ATTR19]] +; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.exp2.f32(float 2.000000e+00) #[[ATTR20]] ; CHECK-NEXT: ret float [[VAL]] ; %val = call float @llvm.amdgcn.exp2.f32(float 2.0) strictfp @@ -6410,7 +6411,7 @@ define float @test_constant_fold_exp2_f32_2_strictfp() strictfp { define float @test_constant_fold_exp2_f32_neg2_strictfp() strictfp { ; CHECK-LABEL: @test_constant_fold_exp2_f32_neg2_strictfp( -; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.exp2.f32(float -2.000000e+00) #[[ATTR19]] +; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.exp2.f32(float -2.000000e+00) #[[ATTR20]] ; CHECK-NEXT: ret float [[VAL]] ; %val = call float @llvm.amdgcn.exp2.f32(float -2.0) strictfp @@ -6419,7 +6420,7 @@ define float @test_constant_fold_exp2_f32_neg2_strictfp() strictfp { define float @test_constant_fold_exp2_f32_neg_strictfp() strictfp { ; CHECK-LABEL: @test_constant_fold_exp2_f32_neg_strictfp( -; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.exp2.f32(float -1.000000e+01) #[[ATTR19]] +; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.exp2.f32(float -1.000000e+01) #[[ATTR20]] ; CHECK-NEXT: ret float [[VAL]] ; %val = call float @llvm.amdgcn.exp2.f32(float -10.0) strictfp `````````` </details> https://github.com/llvm/llvm-project/pull/179025 _______________________________________________ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
