https://github.com/jroelofs updated https://github.com/llvm/llvm-project/pull/80093
>From ed52ee4424459ebc046a625341ad8dbbd38bcbe3 Mon Sep 17 00:00:00 2001 From: Jon Roelofs <jonathan_roel...@apple.com> Date: Tue, 30 Jan 2024 19:13:42 -0800 Subject: [PATCH 1/4] [clang][FMV] Direct-call multi-versioned callees from multi-versioned callers ... when there is a callee with a matching feature set, and no other higher priority callee. This optimization helps the inliner see past the ifunc+resolver to the callee that we know it will always land on. This is a conservative implementation of: https://github.com/llvm/llvm-project/issues/71714 --- clang/lib/CodeGen/CGCall.cpp | 72 +++++ clang/lib/CodeGen/CodeGenModule.cpp | 2 +- .../test/CodeGen/attr-target-mv-direct-call.c | 245 ++++++++++++++++++ 3 files changed, 318 insertions(+), 1 deletion(-) create mode 100644 clang/test/CodeGen/attr-target-mv-direct-call.c diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp index 28c211aa631e4..84a04e3ccddd8 100644 --- a/clang/lib/CodeGen/CGCall.cpp +++ b/clang/lib/CodeGen/CGCall.cpp @@ -4966,6 +4966,11 @@ static unsigned getMaxVectorWidth(const llvm::Type *Ty) { return MaxVectorWidth; } +// FIXME: put this somewhere nicer to share +unsigned +TargetMVPriority(const TargetInfo &TI, + const CodeGenFunction::MultiVersionResolverOption &RO); + RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, const CGCallee &Callee, ReturnValueSlot ReturnValue, @@ -5437,6 +5442,73 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, const CGCallee &ConcreteCallee = Callee.prepareConcreteCallee(*this); llvm::Value *CalleePtr = ConcreteCallee.getFunctionPointer(); + // If a multi-versioned caller calls a multi-versioned callee, skip the + // resolver when there is a precise match on the feature sets, and no + // possibility of a better match at runtime. + if (const auto *CallerFD = dyn_cast_or_null<FunctionDecl>(CurGD.getDecl())) + if (const auto *CallerTVA = CallerFD->getAttr<TargetVersionAttr>()) + if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(TargetDecl)) + // FIXME: do the same where either the caller or callee are + // target_clones. + if (FD->isTargetMultiVersion()) { + llvm::SmallVector<StringRef, 8> CallerFeats; + CallerTVA->getFeatures(CallerFeats); + MultiVersionResolverOption CallerMVRO(nullptr, "", CallerFeats); + + bool HasHigherPriorityCallee = false; + llvm::Constant *FoundMatchingCallee = nullptr; + getContext().forEachMultiversionedFunctionVersion( + FD, [this, FD, &CallerMVRO, &HasHigherPriorityCallee, + &FoundMatchingCallee](const FunctionDecl *CurFD) { + const auto *CalleeTVA = CurFD->getAttr<TargetVersionAttr>(); + + GlobalDecl CurGD{ + (CurFD->isDefined() ? CurFD->getDefinition() : CurFD)}; + StringRef MangledName = CGM.getMangledName(CurFD); + + llvm::SmallVector<StringRef, 8> CalleeFeats; + CalleeTVA->getFeatures(CalleeFeats); + MultiVersionResolverOption CalleeMVRO(nullptr, "", CalleeFeats); + + const TargetInfo &TI = getTarget(); + + // If there is a higher priority callee, we can't do the + // optimization at all, as it would be a valid choice at + // runtime. + if (TargetMVPriority(TI, CalleeMVRO) > + TargetMVPriority(TI, CallerMVRO)) { + HasHigherPriorityCallee = true; + return; + } + + // FIXME: we could allow a lower-priority match when the + // features are a proper subset. But for now, to keep things + // simpler, we only care about a precise match. + if (TargetMVPriority(TI, CalleeMVRO) < + TargetMVPriority(TI, CallerMVRO)) + return; + + if (llvm::Constant *Func = CGM.GetGlobalValue(MangledName)) { + FoundMatchingCallee = Func; + return; + } + + if (CurFD->isDefined()) { + // FIXME: not sure how to get the address + } else { + const CGFunctionInfo &FI = + getTypes().arrangeGlobalDeclaration(FD); + llvm::FunctionType *Ty = getTypes().GetFunctionType(FI); + FoundMatchingCallee = + CGM.GetAddrOfFunction(CurGD, Ty, /*ForVTable=*/false, + /*DontDefer=*/false, ForDefinition); + } + }); + + if (FoundMatchingCallee && !HasHigherPriorityCallee) + CalleePtr = FoundMatchingCallee; + } + // If we're using inalloca, set up that argument. if (ArgMemory.isValid()) { llvm::Value *Arg = ArgMemory.getPointer(); diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp index 6ec54cc01c923..c334e4a3a40f3 100644 --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -4092,7 +4092,7 @@ void CodeGenModule::EmitGlobalDefinition(GlobalDecl GD, llvm::GlobalValue *GV) { static void ReplaceUsesOfNonProtoTypeWithRealFunction(llvm::GlobalValue *Old, llvm::Function *NewFn); -static unsigned +unsigned TargetMVPriority(const TargetInfo &TI, const CodeGenFunction::MultiVersionResolverOption &RO) { unsigned Priority = 0; diff --git a/clang/test/CodeGen/attr-target-mv-direct-call.c b/clang/test/CodeGen/attr-target-mv-direct-call.c new file mode 100644 index 0000000000000..687fdd1ca3c24 --- /dev/null +++ b/clang/test/CodeGen/attr-target-mv-direct-call.c @@ -0,0 +1,245 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --check-attributes --check-globals --include-generated-funcs +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -S -emit-llvm -o - %s | FileCheck %s + +// Check that we make a direct call from direct_caller._Msimd to +// direct_callee._Msimd when there is no better option. +__attribute__((target_version("simd"))) void direct_callee(void) {} +__attribute__((target_version("default"))) void direct_callee(void) {} +__attribute__((target_version("simd"))) void direct_caller(void) { direct_callee(); } +__attribute__((target_version("default"))) void direct_caller(void) { direct_callee(); } + +// ... and that we go through the ifunc+resolver when there is a better option +// that might be chosen at runtime. +__attribute__((target_version("simd"))) void resolved_callee1(void) {} +__attribute__((target_version("fcma"))) void resolved_callee1(void) {} +__attribute__((target_version("default"))) void resolved_callee1(void) {} +__attribute__((target_version("simd"))) void resolved_caller1(void) { resolved_callee1(); } +__attribute__((target_version("default"))) void resolved_caller1(void) { resolved_callee1(); } + +// FIXME: we could direct call in cases like this: +__attribute__((target_version("fp"))) void resolved_callee2(void) {} +__attribute__((target_version("default"))) void resolved_callee2(void) {} +__attribute__((target_version("simd+fp"))) void resolved_caller2(void) { resolved_callee2(); } +__attribute__((target_version("default"))) void resolved_caller2(void) { resolved_callee2(); } + +void source() { + direct_caller(); + resolved_caller1(); + resolved_caller2(); +} + +//. +// CHECK: @__aarch64_cpu_features = external dso_local global { i64 } +// CHECK: @direct_callee.ifunc = weak_odr ifunc void (), ptr @direct_callee.resolver +// CHECK: @direct_caller.ifunc = weak_odr ifunc void (), ptr @direct_caller.resolver +// CHECK: @resolved_callee1.ifunc = weak_odr ifunc void (), ptr @resolved_callee1.resolver +// CHECK: @resolved_caller1.ifunc = weak_odr ifunc void (), ptr @resolved_caller1.resolver +// CHECK: @resolved_callee2.ifunc = weak_odr ifunc void (), ptr @resolved_callee2.resolver +// CHECK: @resolved_caller2.ifunc = weak_odr ifunc void (), ptr @resolved_caller2.resolver +//. +// CHECK: Function Attrs: noinline nounwind optnone +// CHECK-LABEL: define {{[^@]+}}@direct_callee._Msimd +// CHECK-SAME: () #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define {{[^@]+}}@direct_callee.resolver() comdat { +// CHECK-NEXT: resolver_entry: +// CHECK-NEXT: call void @__init_cpu_features_resolver() +// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8 +// CHECK-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 512 +// CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 512 +// CHECK-NEXT: [[TMP3:%.*]] = and i1 true, [[TMP2]] +// CHECK-NEXT: br i1 [[TMP3]], label [[RESOLVER_RETURN:%.*]], label [[RESOLVER_ELSE:%.*]] +// CHECK: resolver_return: +// CHECK-NEXT: ret ptr @direct_callee._Msimd +// CHECK: resolver_else: +// CHECK-NEXT: ret ptr @direct_callee.default +// +// +// CHECK: Function Attrs: noinline nounwind optnone +// CHECK-LABEL: define {{[^@]+}}@direct_caller._Msimd +// CHECK-SAME: () #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: call void @direct_callee._Msimd() +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define {{[^@]+}}@direct_caller.resolver() comdat { +// CHECK-NEXT: resolver_entry: +// CHECK-NEXT: call void @__init_cpu_features_resolver() +// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8 +// CHECK-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 512 +// CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 512 +// CHECK-NEXT: [[TMP3:%.*]] = and i1 true, [[TMP2]] +// CHECK-NEXT: br i1 [[TMP3]], label [[RESOLVER_RETURN:%.*]], label [[RESOLVER_ELSE:%.*]] +// CHECK: resolver_return: +// CHECK-NEXT: ret ptr @direct_caller._Msimd +// CHECK: resolver_else: +// CHECK-NEXT: ret ptr @direct_caller.default +// +// +// CHECK: Function Attrs: noinline nounwind optnone +// CHECK-LABEL: define {{[^@]+}}@resolved_callee1._Msimd +// CHECK-SAME: () #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define {{[^@]+}}@resolved_callee1.resolver() comdat { +// CHECK-NEXT: resolver_entry: +// CHECK-NEXT: call void @__init_cpu_features_resolver() +// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8 +// CHECK-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 2097152 +// CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 2097152 +// CHECK-NEXT: [[TMP3:%.*]] = and i1 true, [[TMP2]] +// CHECK-NEXT: br i1 [[TMP3]], label [[RESOLVER_RETURN:%.*]], label [[RESOLVER_ELSE:%.*]] +// CHECK: resolver_return: +// CHECK-NEXT: ret ptr @resolved_callee1._Mfcma +// CHECK: resolver_else: +// CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8 +// CHECK-NEXT: [[TMP5:%.*]] = and i64 [[TMP4]], 512 +// CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[TMP5]], 512 +// CHECK-NEXT: [[TMP7:%.*]] = and i1 true, [[TMP6]] +// CHECK-NEXT: br i1 [[TMP7]], label [[RESOLVER_RETURN1:%.*]], label [[RESOLVER_ELSE2:%.*]] +// CHECK: resolver_return1: +// CHECK-NEXT: ret ptr @resolved_callee1._Msimd +// CHECK: resolver_else2: +// CHECK-NEXT: ret ptr @resolved_callee1.default +// +// +// CHECK: Function Attrs: noinline nounwind optnone +// CHECK-LABEL: define {{[^@]+}}@resolved_caller1._Msimd +// CHECK-SAME: () #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: call void @resolved_callee1.ifunc() +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define {{[^@]+}}@resolved_caller1.resolver() comdat { +// CHECK-NEXT: resolver_entry: +// CHECK-NEXT: call void @__init_cpu_features_resolver() +// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8 +// CHECK-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 512 +// CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 512 +// CHECK-NEXT: [[TMP3:%.*]] = and i1 true, [[TMP2]] +// CHECK-NEXT: br i1 [[TMP3]], label [[RESOLVER_RETURN:%.*]], label [[RESOLVER_ELSE:%.*]] +// CHECK: resolver_return: +// CHECK-NEXT: ret ptr @resolved_caller1._Msimd +// CHECK: resolver_else: +// CHECK-NEXT: ret ptr @resolved_caller1.default +// +// +// CHECK: Function Attrs: noinline nounwind optnone +// CHECK-LABEL: define {{[^@]+}}@resolved_callee2._Mfp +// CHECK-SAME: () #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define {{[^@]+}}@resolved_callee2.resolver() comdat { +// CHECK-NEXT: resolver_entry: +// CHECK-NEXT: call void @__init_cpu_features_resolver() +// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8 +// CHECK-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 256 +// CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 256 +// CHECK-NEXT: [[TMP3:%.*]] = and i1 true, [[TMP2]] +// CHECK-NEXT: br i1 [[TMP3]], label [[RESOLVER_RETURN:%.*]], label [[RESOLVER_ELSE:%.*]] +// CHECK: resolver_return: +// CHECK-NEXT: ret ptr @resolved_callee2._Mfp +// CHECK: resolver_else: +// CHECK-NEXT: ret ptr @resolved_callee2.default +// +// +// CHECK: Function Attrs: noinline nounwind optnone +// CHECK-LABEL: define {{[^@]+}}@resolved_caller2._MfpMsimd +// CHECK-SAME: () #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: call void @resolved_callee2.ifunc() +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define {{[^@]+}}@resolved_caller2.resolver() comdat { +// CHECK-NEXT: resolver_entry: +// CHECK-NEXT: call void @__init_cpu_features_resolver() +// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8 +// CHECK-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 768 +// CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 768 +// CHECK-NEXT: [[TMP3:%.*]] = and i1 true, [[TMP2]] +// CHECK-NEXT: br i1 [[TMP3]], label [[RESOLVER_RETURN:%.*]], label [[RESOLVER_ELSE:%.*]] +// CHECK: resolver_return: +// CHECK-NEXT: ret ptr @resolved_caller2._MfpMsimd +// CHECK: resolver_else: +// CHECK-NEXT: ret ptr @resolved_caller2.default +// +// +// CHECK: Function Attrs: noinline nounwind optnone +// CHECK-LABEL: define {{[^@]+}}@source +// CHECK-SAME: () #[[ATTR1:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: call void @direct_caller.ifunc() +// CHECK-NEXT: call void @resolved_caller1.ifunc() +// CHECK-NEXT: call void @resolved_caller2.ifunc() +// CHECK-NEXT: ret void +// +// +// CHECK: Function Attrs: noinline nounwind optnone +// CHECK-LABEL: define {{[^@]+}}@direct_callee.default +// CHECK-SAME: () #[[ATTR1]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: ret void +// +// +// CHECK: Function Attrs: noinline nounwind optnone +// CHECK-LABEL: define {{[^@]+}}@direct_caller.default +// CHECK-SAME: () #[[ATTR1]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: call void @direct_callee.ifunc() +// CHECK-NEXT: ret void +// +// +// CHECK: Function Attrs: noinline nounwind optnone +// CHECK-LABEL: define {{[^@]+}}@resolved_callee1._Mfcma +// CHECK-SAME: () #[[ATTR2:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: ret void +// +// +// CHECK: Function Attrs: noinline nounwind optnone +// CHECK-LABEL: define {{[^@]+}}@resolved_callee1.default +// CHECK-SAME: () #[[ATTR1]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: ret void +// +// +// CHECK: Function Attrs: noinline nounwind optnone +// CHECK-LABEL: define {{[^@]+}}@resolved_caller1.default +// CHECK-SAME: () #[[ATTR1]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: call void @resolved_callee1.ifunc() +// CHECK-NEXT: ret void +// +// +// CHECK: Function Attrs: noinline nounwind optnone +// CHECK-LABEL: define {{[^@]+}}@resolved_callee2.default +// CHECK-SAME: () #[[ATTR1]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: ret void +// +// +// CHECK: Function Attrs: noinline nounwind optnone +// CHECK-LABEL: define {{[^@]+}}@resolved_caller2.default +// CHECK-SAME: () #[[ATTR1]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: call void @resolved_callee2.ifunc() +// CHECK-NEXT: ret void +// +//. +// CHECK: attributes #[[ATTR0]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fp-armv8,+neon" } +// CHECK: attributes #[[ATTR1]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" } +// CHECK: attributes #[[ATTR2]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+complxnum,+fp-armv8,+neon" } +//. +// CHECK: [[META0:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} +// CHECK: [[META1:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"} +//. >From 712643868509f64ee820688579b0278b8783456c Mon Sep 17 00:00:00 2001 From: Jon Roelofs <jonathan_roel...@apple.com> Date: Wed, 31 Jan 2024 08:53:36 -0800 Subject: [PATCH 2/4] don't do it at -O0 --- clang/lib/CodeGen/CGCall.cpp | 127 ++--- .../test/CodeGen/attr-target-mv-direct-call.c | 453 +++++++++--------- 2 files changed, 284 insertions(+), 296 deletions(-) diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp index 84a04e3ccddd8..fe69b0ab133ac 100644 --- a/clang/lib/CodeGen/CGCall.cpp +++ b/clang/lib/CodeGen/CGCall.cpp @@ -5446,68 +5446,71 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, // resolver when there is a precise match on the feature sets, and no // possibility of a better match at runtime. if (const auto *CallerFD = dyn_cast_or_null<FunctionDecl>(CurGD.getDecl())) - if (const auto *CallerTVA = CallerFD->getAttr<TargetVersionAttr>()) - if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(TargetDecl)) - // FIXME: do the same where either the caller or callee are - // target_clones. - if (FD->isTargetMultiVersion()) { - llvm::SmallVector<StringRef, 8> CallerFeats; - CallerTVA->getFeatures(CallerFeats); - MultiVersionResolverOption CallerMVRO(nullptr, "", CallerFeats); - - bool HasHigherPriorityCallee = false; - llvm::Constant *FoundMatchingCallee = nullptr; - getContext().forEachMultiversionedFunctionVersion( - FD, [this, FD, &CallerMVRO, &HasHigherPriorityCallee, - &FoundMatchingCallee](const FunctionDecl *CurFD) { - const auto *CalleeTVA = CurFD->getAttr<TargetVersionAttr>(); - - GlobalDecl CurGD{ - (CurFD->isDefined() ? CurFD->getDefinition() : CurFD)}; - StringRef MangledName = CGM.getMangledName(CurFD); - - llvm::SmallVector<StringRef, 8> CalleeFeats; - CalleeTVA->getFeatures(CalleeFeats); - MultiVersionResolverOption CalleeMVRO(nullptr, "", CalleeFeats); - - const TargetInfo &TI = getTarget(); - - // If there is a higher priority callee, we can't do the - // optimization at all, as it would be a valid choice at - // runtime. - if (TargetMVPriority(TI, CalleeMVRO) > - TargetMVPriority(TI, CallerMVRO)) { - HasHigherPriorityCallee = true; - return; - } - - // FIXME: we could allow a lower-priority match when the - // features are a proper subset. But for now, to keep things - // simpler, we only care about a precise match. - if (TargetMVPriority(TI, CalleeMVRO) < - TargetMVPriority(TI, CallerMVRO)) - return; - - if (llvm::Constant *Func = CGM.GetGlobalValue(MangledName)) { - FoundMatchingCallee = Func; - return; - } - - if (CurFD->isDefined()) { - // FIXME: not sure how to get the address - } else { - const CGFunctionInfo &FI = - getTypes().arrangeGlobalDeclaration(FD); - llvm::FunctionType *Ty = getTypes().GetFunctionType(FI); - FoundMatchingCallee = - CGM.GetAddrOfFunction(CurGD, Ty, /*ForVTable=*/false, - /*DontDefer=*/false, ForDefinition); - } - }); - - if (FoundMatchingCallee && !HasHigherPriorityCallee) - CalleePtr = FoundMatchingCallee; - } + if (CGM.getCodeGenOpts().OptimizationLevel > 0 && + !CallerFD->hasAttr<OptimizeNoneAttr>()) + if (const auto *CallerTVA = CallerFD->getAttr<TargetVersionAttr>()) + if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(TargetDecl)) + // FIXME: do the same where either the caller or callee are + // target_clones. + if (FD->isTargetMultiVersion()) { + llvm::SmallVector<StringRef, 8> CallerFeats; + CallerTVA->getFeatures(CallerFeats); + MultiVersionResolverOption CallerMVRO(nullptr, "", CallerFeats); + + bool HasHigherPriorityCallee = false; + llvm::Constant *FoundMatchingCallee = nullptr; + getContext().forEachMultiversionedFunctionVersion( + FD, [this, FD, &CallerMVRO, &HasHigherPriorityCallee, + &FoundMatchingCallee](const FunctionDecl *CurFD) { + const auto *CalleeTVA = CurFD->getAttr<TargetVersionAttr>(); + + GlobalDecl CurGD{ + (CurFD->isDefined() ? CurFD->getDefinition() : CurFD)}; + StringRef MangledName = CGM.getMangledName(CurFD); + + llvm::SmallVector<StringRef, 8> CalleeFeats; + CalleeTVA->getFeatures(CalleeFeats); + MultiVersionResolverOption CalleeMVRO(nullptr, "", + CalleeFeats); + + const TargetInfo &TI = getTarget(); + + // If there is a higher priority callee, we can't do the + // optimization at all, as it would be a valid choice at + // runtime. + if (TargetMVPriority(TI, CalleeMVRO) > + TargetMVPriority(TI, CallerMVRO)) { + HasHigherPriorityCallee = true; + return; + } + + // FIXME: we could allow a lower-priority match when the + // features are a proper subset. But for now, to keep things + // simpler, we only care about a precise match. + if (TargetMVPriority(TI, CalleeMVRO) < + TargetMVPriority(TI, CallerMVRO)) + return; + + if (llvm::Constant *Func = CGM.GetGlobalValue(MangledName)) { + FoundMatchingCallee = Func; + return; + } + + if (CurFD->isDefined()) { + // FIXME: not sure how to get the address + } else { + const CGFunctionInfo &FI = + getTypes().arrangeGlobalDeclaration(FD); + llvm::FunctionType *Ty = getTypes().GetFunctionType(FI); + FoundMatchingCallee = CGM.GetAddrOfFunction( + CurGD, Ty, /*ForVTable=*/false, + /*DontDefer=*/false, ForDefinition); + } + }); + + if (FoundMatchingCallee && !HasHigherPriorityCallee) + CalleePtr = FoundMatchingCallee; + } // If we're using inalloca, set up that argument. if (ArgMemory.isValid()) { diff --git a/clang/test/CodeGen/attr-target-mv-direct-call.c b/clang/test/CodeGen/attr-target-mv-direct-call.c index 687fdd1ca3c24..9a465dba52d25 100644 --- a/clang/test/CodeGen/attr-target-mv-direct-call.c +++ b/clang/test/CodeGen/attr-target-mv-direct-call.c @@ -1,245 +1,230 @@ -// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --check-attributes --check-globals --include-generated-funcs -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -S -emit-llvm -o - %s | FileCheck %s +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --filter "call i32" --include-generated-funcs +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -O0 -S -emit-llvm -disable-llvm-optzns -o - %s | FileCheck %s --check-prefixes=CHECK,O0 +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -O2 -S -emit-llvm -disable-llvm-optzns -o - %s | FileCheck %s --check-prefixes=CHECK,O2 // Check that we make a direct call from direct_caller._Msimd to // direct_callee._Msimd when there is no better option. -__attribute__((target_version("simd"))) void direct_callee(void) {} -__attribute__((target_version("default"))) void direct_callee(void) {} -__attribute__((target_version("simd"))) void direct_caller(void) { direct_callee(); } -__attribute__((target_version("default"))) void direct_caller(void) { direct_callee(); } +__attribute__((target_version("simd"))) int direct_callee(void) { return 1; } +__attribute__((target_version("default"))) int direct_callee(void) { return 2; } +__attribute__((target_version("simd"))) int direct_caller(void) { return direct_callee(); } +__attribute__((target_version("default"))) int direct_caller(void) { return direct_callee(); } + +__attribute__((target_version("simd"), optnone)) int optnone_caller(void) { return direct_callee(); } +__attribute__((target_version("default"), optnone)) int optnone_caller(void) { return direct_callee(); } // ... and that we go through the ifunc+resolver when there is a better option // that might be chosen at runtime. -__attribute__((target_version("simd"))) void resolved_callee1(void) {} -__attribute__((target_version("fcma"))) void resolved_callee1(void) {} -__attribute__((target_version("default"))) void resolved_callee1(void) {} -__attribute__((target_version("simd"))) void resolved_caller1(void) { resolved_callee1(); } -__attribute__((target_version("default"))) void resolved_caller1(void) { resolved_callee1(); } +__attribute__((target_version("simd"))) int resolved_callee1(void) { return 3; } +__attribute__((target_version("fcma"))) int resolved_callee1(void) { return 4; } +__attribute__((target_version("default"))) int resolved_callee1(void) { return 5; } +__attribute__((target_version("simd"))) int resolved_caller1(void) { return resolved_callee1(); } +__attribute__((target_version("default"))) int resolved_caller1(void) { return resolved_callee1(); } // FIXME: we could direct call in cases like this: -__attribute__((target_version("fp"))) void resolved_callee2(void) {} -__attribute__((target_version("default"))) void resolved_callee2(void) {} -__attribute__((target_version("simd+fp"))) void resolved_caller2(void) { resolved_callee2(); } -__attribute__((target_version("default"))) void resolved_caller2(void) { resolved_callee2(); } +__attribute__((target_version("fp"))) int resolved_callee2(void) { return 6; } +__attribute__((target_version("default"))) int resolved_callee2(void) { return 7; } +__attribute__((target_version("simd+fp"))) int resolved_caller2(void) { return resolved_callee2(); } +__attribute__((target_version("default"))) int resolved_caller2(void) { return resolved_callee2(); } -void source() { - direct_caller(); - resolved_caller1(); - resolved_caller2(); +int source() { + return direct_caller() + optnone_caller() + resolved_caller1() + resolved_caller2(); } -//. -// CHECK: @__aarch64_cpu_features = external dso_local global { i64 } -// CHECK: @direct_callee.ifunc = weak_odr ifunc void (), ptr @direct_callee.resolver -// CHECK: @direct_caller.ifunc = weak_odr ifunc void (), ptr @direct_caller.resolver -// CHECK: @resolved_callee1.ifunc = weak_odr ifunc void (), ptr @resolved_callee1.resolver -// CHECK: @resolved_caller1.ifunc = weak_odr ifunc void (), ptr @resolved_caller1.resolver -// CHECK: @resolved_callee2.ifunc = weak_odr ifunc void (), ptr @resolved_callee2.resolver -// CHECK: @resolved_caller2.ifunc = weak_odr ifunc void (), ptr @resolved_caller2.resolver -//. -// CHECK: Function Attrs: noinline nounwind optnone -// CHECK-LABEL: define {{[^@]+}}@direct_callee._Msimd -// CHECK-SAME: () #[[ATTR0:[0-9]+]] { -// CHECK-NEXT: entry: -// CHECK-NEXT: ret void -// -// -// CHECK-LABEL: define {{[^@]+}}@direct_callee.resolver() comdat { -// CHECK-NEXT: resolver_entry: -// CHECK-NEXT: call void @__init_cpu_features_resolver() -// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8 -// CHECK-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 512 -// CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 512 -// CHECK-NEXT: [[TMP3:%.*]] = and i1 true, [[TMP2]] -// CHECK-NEXT: br i1 [[TMP3]], label [[RESOLVER_RETURN:%.*]], label [[RESOLVER_ELSE:%.*]] -// CHECK: resolver_return: -// CHECK-NEXT: ret ptr @direct_callee._Msimd -// CHECK: resolver_else: -// CHECK-NEXT: ret ptr @direct_callee.default -// -// -// CHECK: Function Attrs: noinline nounwind optnone -// CHECK-LABEL: define {{[^@]+}}@direct_caller._Msimd -// CHECK-SAME: () #[[ATTR0]] { -// CHECK-NEXT: entry: -// CHECK-NEXT: call void @direct_callee._Msimd() -// CHECK-NEXT: ret void -// -// -// CHECK-LABEL: define {{[^@]+}}@direct_caller.resolver() comdat { -// CHECK-NEXT: resolver_entry: -// CHECK-NEXT: call void @__init_cpu_features_resolver() -// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8 -// CHECK-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 512 -// CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 512 -// CHECK-NEXT: [[TMP3:%.*]] = and i1 true, [[TMP2]] -// CHECK-NEXT: br i1 [[TMP3]], label [[RESOLVER_RETURN:%.*]], label [[RESOLVER_ELSE:%.*]] -// CHECK: resolver_return: -// CHECK-NEXT: ret ptr @direct_caller._Msimd -// CHECK: resolver_else: -// CHECK-NEXT: ret ptr @direct_caller.default -// -// -// CHECK: Function Attrs: noinline nounwind optnone -// CHECK-LABEL: define {{[^@]+}}@resolved_callee1._Msimd -// CHECK-SAME: () #[[ATTR0]] { -// CHECK-NEXT: entry: -// CHECK-NEXT: ret void -// -// -// CHECK-LABEL: define {{[^@]+}}@resolved_callee1.resolver() comdat { -// CHECK-NEXT: resolver_entry: -// CHECK-NEXT: call void @__init_cpu_features_resolver() -// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8 -// CHECK-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 2097152 -// CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 2097152 -// CHECK-NEXT: [[TMP3:%.*]] = and i1 true, [[TMP2]] -// CHECK-NEXT: br i1 [[TMP3]], label [[RESOLVER_RETURN:%.*]], label [[RESOLVER_ELSE:%.*]] -// CHECK: resolver_return: -// CHECK-NEXT: ret ptr @resolved_callee1._Mfcma -// CHECK: resolver_else: -// CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8 -// CHECK-NEXT: [[TMP5:%.*]] = and i64 [[TMP4]], 512 -// CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[TMP5]], 512 -// CHECK-NEXT: [[TMP7:%.*]] = and i1 true, [[TMP6]] -// CHECK-NEXT: br i1 [[TMP7]], label [[RESOLVER_RETURN1:%.*]], label [[RESOLVER_ELSE2:%.*]] -// CHECK: resolver_return1: -// CHECK-NEXT: ret ptr @resolved_callee1._Msimd -// CHECK: resolver_else2: -// CHECK-NEXT: ret ptr @resolved_callee1.default -// -// -// CHECK: Function Attrs: noinline nounwind optnone -// CHECK-LABEL: define {{[^@]+}}@resolved_caller1._Msimd -// CHECK-SAME: () #[[ATTR0]] { -// CHECK-NEXT: entry: -// CHECK-NEXT: call void @resolved_callee1.ifunc() -// CHECK-NEXT: ret void -// -// -// CHECK-LABEL: define {{[^@]+}}@resolved_caller1.resolver() comdat { -// CHECK-NEXT: resolver_entry: -// CHECK-NEXT: call void @__init_cpu_features_resolver() -// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8 -// CHECK-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 512 -// CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 512 -// CHECK-NEXT: [[TMP3:%.*]] = and i1 true, [[TMP2]] -// CHECK-NEXT: br i1 [[TMP3]], label [[RESOLVER_RETURN:%.*]], label [[RESOLVER_ELSE:%.*]] -// CHECK: resolver_return: -// CHECK-NEXT: ret ptr @resolved_caller1._Msimd -// CHECK: resolver_else: -// CHECK-NEXT: ret ptr @resolved_caller1.default -// -// -// CHECK: Function Attrs: noinline nounwind optnone -// CHECK-LABEL: define {{[^@]+}}@resolved_callee2._Mfp -// CHECK-SAME: () #[[ATTR0]] { -// CHECK-NEXT: entry: -// CHECK-NEXT: ret void -// -// -// CHECK-LABEL: define {{[^@]+}}@resolved_callee2.resolver() comdat { -// CHECK-NEXT: resolver_entry: -// CHECK-NEXT: call void @__init_cpu_features_resolver() -// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8 -// CHECK-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 256 -// CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 256 -// CHECK-NEXT: [[TMP3:%.*]] = and i1 true, [[TMP2]] -// CHECK-NEXT: br i1 [[TMP3]], label [[RESOLVER_RETURN:%.*]], label [[RESOLVER_ELSE:%.*]] -// CHECK: resolver_return: -// CHECK-NEXT: ret ptr @resolved_callee2._Mfp -// CHECK: resolver_else: -// CHECK-NEXT: ret ptr @resolved_callee2.default -// -// -// CHECK: Function Attrs: noinline nounwind optnone -// CHECK-LABEL: define {{[^@]+}}@resolved_caller2._MfpMsimd -// CHECK-SAME: () #[[ATTR0]] { -// CHECK-NEXT: entry: -// CHECK-NEXT: call void @resolved_callee2.ifunc() -// CHECK-NEXT: ret void -// -// -// CHECK-LABEL: define {{[^@]+}}@resolved_caller2.resolver() comdat { -// CHECK-NEXT: resolver_entry: -// CHECK-NEXT: call void @__init_cpu_features_resolver() -// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8 -// CHECK-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 768 -// CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 768 -// CHECK-NEXT: [[TMP3:%.*]] = and i1 true, [[TMP2]] -// CHECK-NEXT: br i1 [[TMP3]], label [[RESOLVER_RETURN:%.*]], label [[RESOLVER_ELSE:%.*]] -// CHECK: resolver_return: -// CHECK-NEXT: ret ptr @resolved_caller2._MfpMsimd -// CHECK: resolver_else: -// CHECK-NEXT: ret ptr @resolved_caller2.default -// -// -// CHECK: Function Attrs: noinline nounwind optnone -// CHECK-LABEL: define {{[^@]+}}@source -// CHECK-SAME: () #[[ATTR1:[0-9]+]] { -// CHECK-NEXT: entry: -// CHECK-NEXT: call void @direct_caller.ifunc() -// CHECK-NEXT: call void @resolved_caller1.ifunc() -// CHECK-NEXT: call void @resolved_caller2.ifunc() -// CHECK-NEXT: ret void -// -// -// CHECK: Function Attrs: noinline nounwind optnone -// CHECK-LABEL: define {{[^@]+}}@direct_callee.default -// CHECK-SAME: () #[[ATTR1]] { -// CHECK-NEXT: entry: -// CHECK-NEXT: ret void -// -// -// CHECK: Function Attrs: noinline nounwind optnone -// CHECK-LABEL: define {{[^@]+}}@direct_caller.default -// CHECK-SAME: () #[[ATTR1]] { -// CHECK-NEXT: entry: -// CHECK-NEXT: call void @direct_callee.ifunc() -// CHECK-NEXT: ret void -// -// -// CHECK: Function Attrs: noinline nounwind optnone -// CHECK-LABEL: define {{[^@]+}}@resolved_callee1._Mfcma -// CHECK-SAME: () #[[ATTR2:[0-9]+]] { -// CHECK-NEXT: entry: -// CHECK-NEXT: ret void -// -// -// CHECK: Function Attrs: noinline nounwind optnone -// CHECK-LABEL: define {{[^@]+}}@resolved_callee1.default -// CHECK-SAME: () #[[ATTR1]] { -// CHECK-NEXT: entry: -// CHECK-NEXT: ret void -// -// -// CHECK: Function Attrs: noinline nounwind optnone -// CHECK-LABEL: define {{[^@]+}}@resolved_caller1.default -// CHECK-SAME: () #[[ATTR1]] { -// CHECK-NEXT: entry: -// CHECK-NEXT: call void @resolved_callee1.ifunc() -// CHECK-NEXT: ret void -// -// -// CHECK: Function Attrs: noinline nounwind optnone -// CHECK-LABEL: define {{[^@]+}}@resolved_callee2.default -// CHECK-SAME: () #[[ATTR1]] { -// CHECK-NEXT: entry: -// CHECK-NEXT: ret void -// -// -// CHECK: Function Attrs: noinline nounwind optnone -// CHECK-LABEL: define {{[^@]+}}@resolved_caller2.default -// CHECK-SAME: () #[[ATTR1]] { -// CHECK-NEXT: entry: -// CHECK-NEXT: call void @resolved_callee2.ifunc() -// CHECK-NEXT: ret void -// -//. -// CHECK: attributes #[[ATTR0]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fp-armv8,+neon" } -// CHECK: attributes #[[ATTR1]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" } -// CHECK: attributes #[[ATTR2]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+complxnum,+fp-armv8,+neon" } -//. -// CHECK: [[META0:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} -// CHECK: [[META1:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"} -//. +// CHECK-LABEL: @direct_callee._Msimd( +// +// CHECK-LABEL: @direct_callee.resolver( +// +// +// CHECK-LABEL: @direct_caller.resolver( +// +// CHECK-LABEL: @optnone_caller._Msimd( +// CHECK: [[CALL:%.*]] = call i32 @direct_callee.ifunc() +// +// +// CHECK-LABEL: @optnone_caller.resolver( +// +// CHECK-LABEL: @resolved_callee1._Msimd( +// +// CHECK-LABEL: @resolved_callee1.resolver( +// +// CHECK-LABEL: @resolved_caller1._Msimd( +// CHECK: [[CALL:%.*]] = call i32 @resolved_callee1.ifunc() +// +// +// CHECK-LABEL: @resolved_caller1.resolver( +// +// CHECK-LABEL: @resolved_callee2._Mfp( +// +// CHECK-LABEL: @resolved_callee2.resolver( +// +// CHECK-LABEL: @resolved_caller2._MfpMsimd( +// CHECK: [[CALL:%.*]] = call i32 @resolved_callee2.ifunc() +// +// +// CHECK-LABEL: @resolved_caller2.resolver( +// +// CHECK-LABEL: @source( +// CHECK: [[CALL:%.*]] = call i32 @direct_caller.ifunc() +// CHECK: [[CALL1:%.*]] = call i32 @optnone_caller.ifunc() +// CHECK: [[CALL2:%.*]] = call i32 @resolved_caller1.ifunc() +// CHECK: [[CALL4:%.*]] = call i32 @resolved_caller2.ifunc() +// +// +// CHECK-LABEL: @direct_callee.default( +// +// CHECK-LABEL: @direct_caller.default( +// CHECK: [[CALL:%.*]] = call i32 @direct_callee.ifunc() +// +// +// CHECK-LABEL: @optnone_caller.default( +// CHECK: [[CALL:%.*]] = call i32 @direct_callee.ifunc() +// +// +// CHECK-LABEL: @resolved_callee1._Mfcma( +// +// CHECK-LABEL: @resolved_callee1.default( +// +// CHECK-LABEL: @resolved_caller1.default( +// CHECK: [[CALL:%.*]] = call i32 @resolved_callee1.ifunc() +// +// +// CHECK-LABEL: @resolved_callee2.default( +// +// CHECK-LABEL: @resolved_caller2.default( +// CHECK: [[CALL:%.*]] = call i32 @resolved_callee2.ifunc() +// +// +// O0-LABEL: @direct_callee._Msimd( +// +// O0-LABEL: @direct_callee.resolver( +// +// O0-LABEL: @direct_caller._Msimd( +// O0: [[CALL:%.*]] = call i32 @direct_callee.ifunc() +// +// +// O0-LABEL: @direct_caller.resolver( +// +// O0-LABEL: @optnone_caller._Msimd( +// O0: [[CALL:%.*]] = call i32 @direct_callee.ifunc() +// +// +// O0-LABEL: @optnone_caller.resolver( +// +// O0-LABEL: @resolved_callee1._Msimd( +// +// O0-LABEL: @resolved_callee1.resolver( +// +// O0-LABEL: @resolved_caller1._Msimd( +// O0: [[CALL:%.*]] = call i32 @resolved_callee1.ifunc() +// +// +// O0-LABEL: @resolved_caller1.resolver( +// +// O0-LABEL: @resolved_callee2._Mfp( +// +// O0-LABEL: @resolved_callee2.resolver( +// +// O0-LABEL: @resolved_caller2._MfpMsimd( +// O0: [[CALL:%.*]] = call i32 @resolved_callee2.ifunc() +// +// +// O0-LABEL: @resolved_caller2.resolver( +// +// O0-LABEL: @source( +// O0: [[CALL:%.*]] = call i32 @direct_caller.ifunc() +// O0: [[CALL1:%.*]] = call i32 @optnone_caller.ifunc() +// O0: [[CALL2:%.*]] = call i32 @resolved_caller1.ifunc() +// O0: [[CALL4:%.*]] = call i32 @resolved_caller2.ifunc() +// +// +// O0-LABEL: @direct_callee.default( +// +// O0-LABEL: @direct_caller.default( +// O0: [[CALL:%.*]] = call i32 @direct_callee.ifunc() +// +// +// O0-LABEL: @optnone_caller.default( +// O0: [[CALL:%.*]] = call i32 @direct_callee.ifunc() +// +// +// O0-LABEL: @resolved_callee1._Mfcma( +// +// O0-LABEL: @resolved_callee1.default( +// +// O0-LABEL: @resolved_caller1.default( +// O0: [[CALL:%.*]] = call i32 @resolved_callee1.ifunc() +// +// +// O0-LABEL: @resolved_callee2.default( +// +// O0-LABEL: @resolved_caller2.default( +// O0: [[CALL:%.*]] = call i32 @resolved_callee2.ifunc() +// +// +// +// O2-LABEL: @direct_callee._Msimd( +// +// O2-LABEL: @direct_callee.resolver( +// +// O2-LABEL: @direct_caller._Msimd( +// O2: [[CALL:%.*]] = call i32 @direct_callee._Msimd() +// +// +// O2-LABEL: @direct_caller.resolver( +// +// O2-LABEL: @optnone_caller._Msimd( +// O2: [[CALL:%.*]] = call i32 @direct_callee.ifunc() +// +// +// O2-LABEL: @optnone_caller.resolver( +// +// O2-LABEL: @resolved_callee1._Msimd( +// +// O2-LABEL: @resolved_callee1.resolver( +// +// O2-LABEL: @resolved_caller1._Msimd( +// O2: [[CALL:%.*]] = call i32 @resolved_callee1.ifunc() +// +// +// O2-LABEL: @resolved_caller1.resolver( +// +// O2-LABEL: @resolved_callee2._Mfp( +// +// O2-LABEL: @resolved_callee2.resolver( +// +// O2-LABEL: @resolved_caller2._MfpMsimd( +// O2: [[CALL:%.*]] = call i32 @resolved_callee2.ifunc() +// +// +// O2-LABEL: @resolved_caller2.resolver( +// +// O2-LABEL: @source( +// O2: [[CALL:%.*]] = call i32 @direct_caller.ifunc() +// O2: [[CALL1:%.*]] = call i32 @optnone_caller.ifunc() +// O2: [[CALL2:%.*]] = call i32 @resolved_caller1.ifunc() +// O2: [[CALL4:%.*]] = call i32 @resolved_caller2.ifunc() +// +// +// O2-LABEL: @direct_callee.default( +// +// O2-LABEL: @direct_caller.default( +// O2: [[CALL:%.*]] = call i32 @direct_callee.ifunc() +// +// +// O2-LABEL: @optnone_caller.default( +// O2: [[CALL:%.*]] = call i32 @direct_callee.ifunc() +// +// +// O2-LABEL: @resolved_callee1._Mfcma( +// +// O2-LABEL: @resolved_callee1.default( +// +// O2-LABEL: @resolved_caller1.default( +// O2: [[CALL:%.*]] = call i32 @resolved_callee1.ifunc() +// +// +// O2-LABEL: @resolved_callee2.default( +// +// O2-LABEL: @resolved_caller2.default( +// O2: [[CALL:%.*]] = call i32 @resolved_callee2.ifunc() +// >From cadeb37acd2786f29984645420e8a715e8ccfe8b Mon Sep 17 00:00:00 2001 From: Jon Roelofs <jonathan_roel...@apple.com> Date: Wed, 31 Jan 2024 08:55:43 -0800 Subject: [PATCH 3/4] only care about caller => callee calls in the test --- clang/test/CodeGen/attr-target-mv-direct-call.c | 17 +---------------- 1 file changed, 1 insertion(+), 16 deletions(-) diff --git a/clang/test/CodeGen/attr-target-mv-direct-call.c b/clang/test/CodeGen/attr-target-mv-direct-call.c index 9a465dba52d25..88b4568d0c084 100644 --- a/clang/test/CodeGen/attr-target-mv-direct-call.c +++ b/clang/test/CodeGen/attr-target-mv-direct-call.c @@ -1,4 +1,4 @@ -// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --filter "call i32" --include-generated-funcs +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --filter "call i32.*callee" --include-generated-funcs // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -O0 -S -emit-llvm -disable-llvm-optzns -o - %s | FileCheck %s --check-prefixes=CHECK,O0 // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -O2 -S -emit-llvm -disable-llvm-optzns -o - %s | FileCheck %s --check-prefixes=CHECK,O2 @@ -64,11 +64,6 @@ int source() { // CHECK-LABEL: @resolved_caller2.resolver( // // CHECK-LABEL: @source( -// CHECK: [[CALL:%.*]] = call i32 @direct_caller.ifunc() -// CHECK: [[CALL1:%.*]] = call i32 @optnone_caller.ifunc() -// CHECK: [[CALL2:%.*]] = call i32 @resolved_caller1.ifunc() -// CHECK: [[CALL4:%.*]] = call i32 @resolved_caller2.ifunc() -// // // CHECK-LABEL: @direct_callee.default( // @@ -131,11 +126,6 @@ int source() { // O0-LABEL: @resolved_caller2.resolver( // // O0-LABEL: @source( -// O0: [[CALL:%.*]] = call i32 @direct_caller.ifunc() -// O0: [[CALL1:%.*]] = call i32 @optnone_caller.ifunc() -// O0: [[CALL2:%.*]] = call i32 @resolved_caller1.ifunc() -// O0: [[CALL4:%.*]] = call i32 @resolved_caller2.ifunc() -// // // O0-LABEL: @direct_callee.default( // @@ -199,11 +189,6 @@ int source() { // O2-LABEL: @resolved_caller2.resolver( // // O2-LABEL: @source( -// O2: [[CALL:%.*]] = call i32 @direct_caller.ifunc() -// O2: [[CALL1:%.*]] = call i32 @optnone_caller.ifunc() -// O2: [[CALL2:%.*]] = call i32 @resolved_caller1.ifunc() -// O2: [[CALL4:%.*]] = call i32 @resolved_caller2.ifunc() -// // // O2-LABEL: @direct_callee.default( // >From 4129daf8de38949970bb00b226a2d8df1662a0db Mon Sep 17 00:00:00 2001 From: Jon Roelofs <jonathan_roel...@apple.com> Date: Wed, 31 Jan 2024 09:18:50 -0800 Subject: [PATCH 4/4] address a fixme: move priority calculation to a member function --- clang/lib/CodeGen/CGCall.cpp | 11 ++--------- clang/lib/CodeGen/CodeGenFunction.cpp | 18 ++++++++++++++++++ clang/lib/CodeGen/CodeGenFunction.h | 2 ++ clang/lib/CodeGen/CodeGenModule.cpp | 21 +-------------------- 4 files changed, 23 insertions(+), 29 deletions(-) diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp index fe69b0ab133ac..3f6e171c67345 100644 --- a/clang/lib/CodeGen/CGCall.cpp +++ b/clang/lib/CodeGen/CGCall.cpp @@ -4966,11 +4966,6 @@ static unsigned getMaxVectorWidth(const llvm::Type *Ty) { return MaxVectorWidth; } -// FIXME: put this somewhere nicer to share -unsigned -TargetMVPriority(const TargetInfo &TI, - const CodeGenFunction::MultiVersionResolverOption &RO); - RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, const CGCallee &Callee, ReturnValueSlot ReturnValue, @@ -5478,8 +5473,7 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, // If there is a higher priority callee, we can't do the // optimization at all, as it would be a valid choice at // runtime. - if (TargetMVPriority(TI, CalleeMVRO) > - TargetMVPriority(TI, CallerMVRO)) { + if (CalleeMVRO.priority(TI) > CallerMVRO.priority(TI)) { HasHigherPriorityCallee = true; return; } @@ -5487,8 +5481,7 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, // FIXME: we could allow a lower-priority match when the // features are a proper subset. But for now, to keep things // simpler, we only care about a precise match. - if (TargetMVPriority(TI, CalleeMVRO) < - TargetMVPriority(TI, CallerMVRO)) + if (CalleeMVRO.priority(TI) < CallerMVRO.priority(TI)) return; if (llvm::Constant *Func = CGM.GetGlobalValue(MangledName)) { diff --git a/clang/lib/CodeGen/CodeGenFunction.cpp b/clang/lib/CodeGen/CodeGenFunction.cpp index 1ad905078d349..f2c93b5e5398b 100644 --- a/clang/lib/CodeGen/CodeGenFunction.cpp +++ b/clang/lib/CodeGen/CodeGenFunction.cpp @@ -2697,6 +2697,24 @@ void CodeGenFunction::EmitSanitizerStatReport(llvm::SanitizerStatKind SSK) { CGM.getSanStats().create(IRB, SSK); } +unsigned CodeGenFunction::MultiVersionResolverOption::priority( + const TargetInfo &TI) const { + unsigned Priority = 0; + unsigned NumFeatures = 0; + for (StringRef Feat : Conditions.Features) { + Priority = std::max(Priority, TI.multiVersionSortPriority(Feat)); + NumFeatures++; + } + + if (!Conditions.Architecture.empty()) + Priority = std::max(Priority, + TI.multiVersionSortPriority(Conditions.Architecture)); + + Priority += TI.multiVersionFeatureCost() * NumFeatures; + + return Priority; +} + void CodeGenFunction::EmitKCFIOperandBundle( const CGCallee &Callee, SmallVectorImpl<llvm::OperandBundleDef> &Bundles) { const FunctionProtoType *FP = diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h index 143ad64e8816b..525852437dbb8 100644 --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -4965,6 +4965,8 @@ class CodeGenFunction : public CodeGenTypeCache { MultiVersionResolverOption(llvm::Function *F, StringRef Arch, ArrayRef<StringRef> Feats) : Function(F), Conditions(Arch, Feats) {} + + unsigned priority(const TargetInfo &TI) const; }; // Emits the body of a multiversion function's resolver. Assumes that the diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp index c334e4a3a40f3..d6abd4cc9454d 100644 --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -4092,25 +4092,6 @@ void CodeGenModule::EmitGlobalDefinition(GlobalDecl GD, llvm::GlobalValue *GV) { static void ReplaceUsesOfNonProtoTypeWithRealFunction(llvm::GlobalValue *Old, llvm::Function *NewFn); -unsigned -TargetMVPriority(const TargetInfo &TI, - const CodeGenFunction::MultiVersionResolverOption &RO) { - unsigned Priority = 0; - unsigned NumFeatures = 0; - for (StringRef Feat : RO.Conditions.Features) { - Priority = std::max(Priority, TI.multiVersionSortPriority(Feat)); - NumFeatures++; - } - - if (!RO.Conditions.Architecture.empty()) - Priority = std::max( - Priority, TI.multiVersionSortPriority(RO.Conditions.Architecture)); - - Priority += TI.multiVersionFeatureCost() * NumFeatures; - - return Priority; -} - // Multiversion functions should be at most 'WeakODRLinkage' so that a different // TU can forward declare the function without causing problems. Particularly // in the cases of CPUDispatch, this causes issues. This also makes sure we @@ -4244,7 +4225,7 @@ void CodeGenModule::emitMultiVersionFunctions() { llvm::stable_sort( Options, [&TI](const CodeGenFunction::MultiVersionResolverOption &LHS, const CodeGenFunction::MultiVersionResolverOption &RHS) { - return TargetMVPriority(TI, LHS) > TargetMVPriority(TI, RHS); + return LHS.priority(TI) > RHS.priority(TI); }); CodeGenFunction CGF(*this); CGF.EmitMultiVersionResolver(ResolverFunc, Options); _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits