https://github.com/snikitav updated https://github.com/llvm/llvm-project/pull/74175
>From cb2bdf4a4cb9db2262920a0a474e2024e7a1406a Mon Sep 17 00:00:00 2001 From: Sizov Nikita <s.nikit...@gmail.com> Date: Sat, 2 Dec 2023 04:53:32 +0300 Subject: [PATCH] Missing opt with ctlz and shifts of power of 2 constants (#41333) --- clang/test/CXX/drs/dr2xx.cpp | 2 +- .../InstCombine/InstCombineCalls.cpp | 33 +++ .../InstCombine/ctlz-cttz-shifts.ll | 243 ++++++++++++++++++ 3 files changed, 277 insertions(+), 1 deletion(-) create mode 100644 llvm/test/Transforms/InstCombine/ctlz-cttz-shifts.ll diff --git a/clang/test/CXX/drs/dr2xx.cpp b/clang/test/CXX/drs/dr2xx.cpp index 4dd6d7599f2a0..170753e5efce4 100644 --- a/clang/test/CXX/drs/dr2xx.cpp +++ b/clang/test/CXX/drs/dr2xx.cpp @@ -1298,7 +1298,7 @@ namespace dr299 { // dr299: 2.8 c++11 // cxx98-11-error@#dr299-q {{ambiguous conversion of array size expression of type 'T' to an integral or enumeration type}} // cxx98-11-note@#dr299-int {{conversion to integral type 'int' declared here}} // cxx98-11-note@#dr299-ushort {{conversion to integral type 'unsigned short' declared here}} - // since-cxx14-error-re@#dr299-q {{{{conversion from 'T' to 'unsigned (long|int)' is ambiguous}}}} + // since-cxx14-error-re@#dr299-q {{conversion from 'T' to 'unsigned (long|int)' is ambiguous}} // since-cxx14-note@#dr299-int {{candidate function}} // since-cxx14-note@#dr299-ushort {{candidate function}} } diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index a991f0906052a..f6322b3f4f415 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -514,6 +514,8 @@ static Instruction *foldCttzCtlz(IntrinsicInst &II, InstCombinerImpl &IC) { return IC.replaceInstUsesWith(II, ConstantInt::getNullValue(II.getType())); } + Constant *C; + if (IsTZ) { // cttz(-x) -> cttz(x) if (match(Op0, m_Neg(m_Value(X)))) @@ -549,6 +551,37 @@ static Instruction *foldCttzCtlz(IntrinsicInst &II, InstCombinerImpl &IC) { if (match(Op0, m_Intrinsic<Intrinsic::abs>(m_Value(X)))) return IC.replaceOperand(II, 0, X); + + // cttz(shl(%const, %val), 1) --> add(cttz(%const, 1), %val) + if (match(Op0, m_Shl(m_Constant(C), m_Value(X))) && match(Op1, m_One())) { + Value *ConstCttz = + IC.Builder.CreateBinaryIntrinsic(Intrinsic::cttz, C, Op1); + return BinaryOperator::CreateAdd(ConstCttz, X); + } + + // cttz(lshr exact (%const, %val), 0) --> sub(cttz(%const, 0), %val) + if (match(Op0, m_Exact(m_LShr(m_Constant(C), m_Value(X))))) { + Value *ConstCttz = + IC.Builder.CreateBinaryIntrinsic(Intrinsic::cttz, C, Op1); + return BinaryOperator::CreateSub(ConstCttz, X); + } + } else { + // ctlz(lshr(%const, %val), 1) --> add(ctlz(%const, 1), %val) + if (match(Op0, m_LShr(m_Constant(C), m_Value(X))) && match(Op1, m_One())) { + Value *ConstCtlz = + IC.Builder.CreateBinaryIntrinsic(Intrinsic::ctlz, C, Op1); + return BinaryOperator::CreateAdd(ConstCtlz, X); + } + + // ctlz(shl nuw (%const, %val), 0) | + // ctlz(shl nsw (%const, %val), 0) |--> sub(ctlz(%const, 0), %val) + // ctlz(shl nuw nsw (%const, %val), 0) | + if (match(Op0, m_NUWShl(m_Constant(C), m_Value(X))) || + match(Op0, m_NSWShl(m_Constant(C), m_Value(X)))) { + Value *ConstCtlz = + IC.Builder.CreateBinaryIntrinsic(Intrinsic::ctlz, C, Op1); + return BinaryOperator::CreateSub(ConstCtlz, X); + } } KnownBits Known = IC.computeKnownBits(Op0, 0, &II); diff --git a/llvm/test/Transforms/InstCombine/ctlz-cttz-shifts.ll b/llvm/test/Transforms/InstCombine/ctlz-cttz-shifts.ll new file mode 100644 index 0000000000000..5abe5ab6c9310 --- /dev/null +++ b/llvm/test/Transforms/InstCombine/ctlz-cttz-shifts.ll @@ -0,0 +1,243 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 +; RUN: opt -passes=instcombine -S < %s | FileCheck %s + +declare i32 @llvm.ctlz.i32(i32, i1) +declare i32 @llvm.cttz.i32(i32, i1) +declare <2 x i32> @llvm.ctlz.v2i32(<2 x i32>, i1) +declare <2 x i32> @llvm.cttz.v2i32(<2 x i32>, i1) + +define i32 @lshr_ctlz_true(i32) { +; CHECK-LABEL: define i32 @lshr_ctlz_true( +; CHECK-SAME: i32 [[TMP0:%.*]]) { +; CHECK-NEXT: [[CTLZ:%.*]] = add i32 [[TMP0]], 9 +; CHECK-NEXT: ret i32 [[CTLZ]] +; + %lshr = lshr i32 8387584, %0 + %ctlz = call i32 @llvm.ctlz.i32(i32 %lshr, i1 true) + ret i32 %ctlz +} + +define i32 @shl_nuw_ctlz_false(i32) { +; CHECK-LABEL: define i32 @shl_nuw_ctlz_false( +; CHECK-SAME: i32 [[TMP0:%.*]]) { +; CHECK-NEXT: [[CTLZ:%.*]] = sub i32 9, [[TMP0]] +; CHECK-NEXT: ret i32 [[CTLZ]] +; + %shl = shl nuw i32 8387584, %0 + %ctlz = call i32 @llvm.ctlz.i32(i32 %shl, i1 false) + ret i32 %ctlz +} + +define i32 @shl_nsw_ctlz_false(i32) { +; CHECK-LABEL: define i32 @shl_nsw_ctlz_false( +; CHECK-SAME: i32 [[TMP0:%.*]]) { +; CHECK-NEXT: [[CTLZ:%.*]] = sub i32 9, [[TMP0]] +; CHECK-NEXT: ret i32 [[CTLZ]] +; + %shl = shl nsw i32 8387584, %0 + %ctlz = call i32 @llvm.ctlz.i32(i32 %shl, i1 false) + ret i32 %ctlz +} + +define i32 @shl_nuw_nsw_ctlz_false(i32) { +; CHECK-LABEL: define i32 @shl_nuw_nsw_ctlz_false( +; CHECK-SAME: i32 [[TMP0:%.*]]) { +; CHECK-NEXT: [[CTLZ:%.*]] = sub i32 9, [[TMP0]] +; CHECK-NEXT: ret i32 [[CTLZ]] +; + %shl = shl nuw nsw i32 8387584, %0 + %ctlz = call i32 @llvm.ctlz.i32(i32 %shl, i1 false) + ret i32 %ctlz +} + +define i32 @lshr_exact_cttz_false(i32) { +; CHECK-LABEL: define i32 @lshr_exact_cttz_false( +; CHECK-SAME: i32 [[TMP0:%.*]]) { +; CHECK-NEXT: [[CTTZ:%.*]] = sub i32 10, [[TMP0]] +; CHECK-NEXT: ret i32 [[CTTZ]] +; + %lshr = lshr exact i32 8387584, %0 + %cttz = call i32 @llvm.cttz.i32(i32 %lshr, i1 false) + ret i32 %cttz +} + +define i32 @shl_cttz_true(i32) { +; CHECK-LABEL: define i32 @shl_cttz_true( +; CHECK-SAME: i32 [[TMP0:%.*]]) { +; CHECK-NEXT: [[CTTZ:%.*]] = add i32 [[TMP0]], 10 +; CHECK-NEXT: ret i32 [[CTTZ]] +; + %shl = shl i32 8387584, %0 + %cttz = call i32 @llvm.cttz.i32(i32 %shl, i1 true) + ret i32 %cttz +} + +define <2 x i32> @vec2_lshr_ctlz_true(<2 x i32>) { +; CHECK-LABEL: define <2 x i32> @vec2_lshr_ctlz_true( +; CHECK-SAME: <2 x i32> [[TMP0:%.*]]) { +; CHECK-NEXT: [[CTLZ:%.*]] = add <2 x i32> [[TMP0]], <i32 9, i32 9> +; CHECK-NEXT: ret <2 x i32> [[CTLZ]] +; + %div = lshr <2 x i32> <i32 8387584, i32 4276440>, %0 + %ctlz = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %div, i1 true) + ret <2 x i32> %ctlz +} + +define <2 x i32> @vec2_shl_nuw_ctlz_false(<2 x i32>) { +; CHECK-LABEL: define <2 x i32> @vec2_shl_nuw_ctlz_false( +; CHECK-SAME: <2 x i32> [[TMP0:%.*]]) { +; CHECK-NEXT: [[CTLZ:%.*]] = sub <2 x i32> <i32 9, i32 9>, [[TMP0]] +; CHECK-NEXT: ret <2 x i32> [[CTLZ]] +; + %shl = shl nuw <2 x i32> <i32 8387584, i32 4276440>, %0 + %ctlz = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %shl, i1 false) + ret <2 x i32> %ctlz +} + +define <2 x i32> @vec2_shl_nsw_ctlz_false(<2 x i32>) { +; CHECK-LABEL: define <2 x i32> @vec2_shl_nsw_ctlz_false( +; CHECK-SAME: <2 x i32> [[TMP0:%.*]]) { +; CHECK-NEXT: [[CTLZ:%.*]] = sub <2 x i32> <i32 9, i32 9>, [[TMP0]] +; CHECK-NEXT: ret <2 x i32> [[CTLZ]] +; + %shl = shl nsw <2 x i32> <i32 8387584, i32 4276440>, %0 + %ctlz = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %shl, i1 false) + ret <2 x i32> %ctlz +} + +define <2 x i32> @vec2_shl_nuw_nsw_ctlz_false(<2 x i32>) { +; CHECK-LABEL: define <2 x i32> @vec2_shl_nuw_nsw_ctlz_false( +; CHECK-SAME: <2 x i32> [[TMP0:%.*]]) { +; CHECK-NEXT: [[CTLZ:%.*]] = sub <2 x i32> <i32 9, i32 9>, [[TMP0]] +; CHECK-NEXT: ret <2 x i32> [[CTLZ]] +; + %shl = shl nuw nsw <2 x i32> <i32 8387584, i32 4276440>, %0 + %ctlz = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %shl, i1 false) + ret <2 x i32> %ctlz +} + +define <2 x i32> @vec2_lshr_exact_cttz_false(<2 x i32>) { +; CHECK-LABEL: define <2 x i32> @vec2_lshr_exact_cttz_false( +; CHECK-SAME: <2 x i32> [[TMP0:%.*]]) { +; CHECK-NEXT: [[CTTZ:%.*]] = sub <2 x i32> <i32 10, i32 3>, [[TMP0]] +; CHECK-NEXT: ret <2 x i32> [[CTTZ]] +; + %lshr = lshr exact <2 x i32> <i32 8387584, i32 4276440>, %0 + %cttz = call <2 x i32> @llvm.cttz.v2i32(<2 x i32> %lshr, i1 false) + ret <2 x i32> %cttz +} + +define <2 x i32> @vec2_shl_cttz_true(<2 x i32>) { +; CHECK-LABEL: define <2 x i32> @vec2_shl_cttz_true( +; CHECK-SAME: <2 x i32> [[TMP0:%.*]]) { +; CHECK-NEXT: [[CTTZ:%.*]] = add <2 x i32> [[TMP0]], <i32 10, i32 3> +; CHECK-NEXT: ret <2 x i32> [[CTTZ]] +; + %shl = shl <2 x i32> <i32 8387584, i32 4276440>, %0 + %cttz = call <2 x i32> @llvm.cttz.v2i32(<2 x i32> %shl, i1 true) + ret <2 x i32> %cttz +} + +; negative tests: + +define <2 x i32> @vec2_lshr_ctlz_false_neg(<2 x i32>) { +; CHECK-LABEL: define <2 x i32> @vec2_lshr_ctlz_false_neg( +; CHECK-SAME: <2 x i32> [[TMP0:%.*]]) { +; CHECK-NEXT: [[DIV:%.*]] = lshr <2 x i32> <i32 8387584, i32 4276440>, [[TMP0]] +; CHECK-NEXT: [[CTLZ:%.*]] = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> [[DIV]], i1 false), !range [[RNG0:![0-9]+]] +; CHECK-NEXT: ret <2 x i32> [[CTLZ]] +; + %div = lshr <2 x i32> <i32 8387584, i32 4276440>, %0 + %ctlz = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %div, i1 false) + ret <2 x i32> %ctlz +} + +define <2 x i32> @vec2_shl_ctlz_false_neg(<2 x i32>) { +; CHECK-LABEL: define <2 x i32> @vec2_shl_ctlz_false_neg( +; CHECK-SAME: <2 x i32> [[TMP0:%.*]]) { +; CHECK-NEXT: [[SHL:%.*]] = shl <2 x i32> <i32 8387584, i32 4276440>, [[TMP0]] +; CHECK-NEXT: [[CTLZ:%.*]] = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> [[SHL]], i1 false), !range [[RNG1:![0-9]+]] +; CHECK-NEXT: ret <2 x i32> [[CTLZ]] +; + %shl = shl <2 x i32> <i32 8387584, i32 4276440>, %0 + %ctlz = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %shl, i1 false) + ret <2 x i32> %ctlz +} + +define <2 x i32> @vec2_lshr_cttz_false_neg(<2 x i32>) { +; CHECK-LABEL: define <2 x i32> @vec2_lshr_cttz_false_neg( +; CHECK-SAME: <2 x i32> [[TMP0:%.*]]) { +; CHECK-NEXT: [[LSHR:%.*]] = lshr <2 x i32> <i32 8387584, i32 4276440>, [[TMP0]] +; CHECK-NEXT: [[CTTZ:%.*]] = call <2 x i32> @llvm.cttz.v2i32(<2 x i32> [[LSHR]], i1 false), !range [[RNG1]] +; CHECK-NEXT: ret <2 x i32> [[CTTZ]] +; + %lshr = lshr <2 x i32> <i32 8387584, i32 4276440>, %0 + %cttz = call <2 x i32> @llvm.cttz.v2i32(<2 x i32> %lshr, i1 false) + ret <2 x i32> %cttz +} + +define <2 x i32> @vec2_shl_cttz_false_neg(<2 x i32>) { +; CHECK-LABEL: define <2 x i32> @vec2_shl_cttz_false_neg( +; CHECK-SAME: <2 x i32> [[TMP0:%.*]]) { +; CHECK-NEXT: [[SHL:%.*]] = shl <2 x i32> <i32 8387584, i32 4276440>, [[TMP0]] +; CHECK-NEXT: [[CTTZ:%.*]] = call <2 x i32> @llvm.cttz.v2i32(<2 x i32> [[SHL]], i1 false), !range [[RNG2:![0-9]+]] +; CHECK-NEXT: ret <2 x i32> [[CTTZ]] +; + %shl = shl <2 x i32> <i32 8387584, i32 4276440>, %0 + %cttz = call <2 x i32> @llvm.cttz.v2i32(<2 x i32> %shl, i1 false) + ret <2 x i32> %cttz +} + +define i32 @lshr_ctlz_faslse_neg(i32) { +; CHECK-LABEL: define i32 @lshr_ctlz_faslse_neg( +; CHECK-SAME: i32 [[TMP0:%.*]]) { +; CHECK-NEXT: [[LSHR:%.*]] = lshr i32 8387584, [[TMP0]] +; CHECK-NEXT: [[CTLZ:%.*]] = call i32 @llvm.ctlz.i32(i32 [[LSHR]], i1 false), !range [[RNG0]] +; CHECK-NEXT: ret i32 [[CTLZ]] +; + %lshr = lshr i32 8387584, %0 + %ctlz = call i32 @llvm.ctlz.i32(i32 %lshr, i1 false) + ret i32 %ctlz +} + +define i32 @shl_ctlz_false_neg(i32) { +; CHECK-LABEL: define i32 @shl_ctlz_false_neg( +; CHECK-SAME: i32 [[TMP0:%.*]]) { +; CHECK-NEXT: [[SHL:%.*]] = shl i32 8387584, [[TMP0]] +; CHECK-NEXT: [[CTLZ:%.*]] = call i32 @llvm.ctlz.i32(i32 [[SHL]], i1 false), !range [[RNG1]] +; CHECK-NEXT: ret i32 [[CTLZ]] +; + %shl = shl i32 8387584, %0 + %ctlz = call i32 @llvm.ctlz.i32(i32 %shl, i1 false) + ret i32 %ctlz +} + +define i32 @lshr_cttz_false_neg(i32) { +; CHECK-LABEL: define i32 @lshr_cttz_false_neg( +; CHECK-SAME: i32 [[TMP0:%.*]]) { +; CHECK-NEXT: [[LSHR:%.*]] = lshr i32 8387584, [[TMP0]] +; CHECK-NEXT: [[CTTZ:%.*]] = call i32 @llvm.cttz.i32(i32 [[LSHR]], i1 false), !range [[RNG1]] +; CHECK-NEXT: ret i32 [[CTTZ]] +; + %lshr = lshr i32 8387584, %0 + %cttz = call i32 @llvm.cttz.i32(i32 %lshr, i1 false) + ret i32 %cttz +} + +define i32 @shl_cttz_false_neg(i32) { +; CHECK-LABEL: define i32 @shl_cttz_false_neg( +; CHECK-SAME: i32 [[TMP0:%.*]]) { +; CHECK-NEXT: [[SHL:%.*]] = shl i32 8387584, [[TMP0]] +; CHECK-NEXT: [[CTTZ:%.*]] = call i32 @llvm.cttz.i32(i32 [[SHL]], i1 false), !range [[RNG3:![0-9]+]] +; CHECK-NEXT: ret i32 [[CTTZ]] +; + %shl = shl i32 8387584, %0 + %cttz = call i32 @llvm.cttz.i32(i32 %shl, i1 false) + ret i32 %cttz +} +;. +; CHECK: [[RNG0]] = !{i32 9, i32 33} +; CHECK: [[RNG1]] = !{i32 0, i32 33} +; CHECK: [[RNG2]] = !{i32 3, i32 33} +; CHECK: [[RNG3]] = !{i32 10, i32 33} +;. _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits