[llvm-branch-commits] [BOLT] Function matching with function calls as anchors (PR #96596)
https://github.com/shawbyoung created https://github.com/llvm/llvm-project/pull/96596 Test Plan: tbd ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT] Name similarity function matching (PR #95884)
https://github.com/shawbyoung updated https://github.com/llvm/llvm-project/pull/95884 >From 34652b2eebc62218c50a23509ce99937385c30e6 Mon Sep 17 00:00:00 2001 From: shawbyoung Date: Thu, 20 Jun 2024 23:42:00 -0700 Subject: [PATCH 1/7] spr amend Created using spr 1.3.4 --- bolt/lib/Profile/YAMLProfileReader.cpp | 73 -- 1 file changed, 56 insertions(+), 17 deletions(-) diff --git a/bolt/lib/Profile/YAMLProfileReader.cpp b/bolt/lib/Profile/YAMLProfileReader.cpp index 66cabc236f4b2..c9f6d88f0b13a 100644 --- a/bolt/lib/Profile/YAMLProfileReader.cpp +++ b/bolt/lib/Profile/YAMLProfileReader.cpp @@ -424,36 +424,75 @@ Error YAMLProfileReader::readProfile(BinaryContext ) { // Uses name similarity to match functions that were not matched by name. uint64_t MatchedWithDemangledName = 0; - if (opts::NameSimilarityFunctionMatchingThreshold > 0) { - -std::unordered_map NameToBinaryFunction; -NameToBinaryFunction.reserve(BC.getBinaryFunctions().size()); -for (auto &[_, BF] : BC.getBinaryFunctions()) { + if (opts::NameSimilarityFunctionMatchingThreshold > 0) { +auto DemangleName = [&](const char* String) { int Status = 0; - char *DemangledName = abi::__cxa_demangle(BF.getOneName().str().c_str(), + char *DemangledName = abi::__cxa_demangle(String, nullptr, nullptr, ); - if (Status == 0) -NameToBinaryFunction[std::string(DemangledName)] = + return Status == 0 ? new std::string(DemangledName) : nullptr; +}; + +auto DeriveNameSpace = [&](std::string DemangledName) { + size_t LParen = std::string(DemangledName).find("("); + std::string FunctionName = std::string(DemangledName).substr(0, LParen); + size_t ScopeResolutionOperator = std::string(FunctionName).rfind("::"); + return ScopeResolutionOperator == std::string::npos ? std::string("") : std::string(DemangledName).substr(0, ScopeResolutionOperator); +}; + +std::unordered_map> NamespaceToBFs; +NamespaceToBFs.reserve(BC.getBinaryFunctions().size()); + +for (BinaryFunction *BF : BC.getAllBinaryFunctions()) { + std::string* DemangledName = DemangleName(BF->getOneName().str().c_str()); + if (!DemangledName) +continue; + std::string Namespace = DeriveNameSpace(*DemangledName); + auto It = NamespaceToBFs.find(Namespace); + if (It == NamespaceToBFs.end()) +NamespaceToBFs[Namespace] = {BF}; + else +It->second.push_back(BF); } for (auto YamlBF : YamlBP.Functions) { if (YamlBF.Used) continue; - int Status = 0; - char *DemangledName = - abi::__cxa_demangle(YamlBF.Name.c_str(), nullptr, nullptr, ); - if (Status != 0) + std::string* YamlBFDemangledName = DemangleName(YamlBF.Name.c_str()); + if (!YamlBFDemangledName) continue; - auto It = NameToBinaryFunction.find(DemangledName); - if (It == NameToBinaryFunction.end()) + std::string Namespace = DeriveNameSpace(*YamlBFDemangledName); + auto It = NamespaceToBFs.find(Namespace); + if (It == NamespaceToBFs.end()) continue; - BinaryFunction *BF = It->second; - matchProfileToFunction(YamlBF, *BF); - ++MatchedWithDemangledName; + std::vector BFs = It->second; + + unsigned MinEditDistance = UINT_MAX; + BinaryFunction *ClosestNameBF = nullptr; + + for (BinaryFunction *BF : BFs) { +if (ProfiledFunctions.count(BF)) + continue; +std::string *BFDemangledName = DemangleName(BF->getOneName().str().c_str()); +if (!BFDemangledName) + continue; +unsigned BFEditDistance = StringRef(*BFDemangledName).edit_distance(*YamlBFDemangledName); +if (BFEditDistance < MinEditDistance) { + MinEditDistance = BFEditDistance; + ClosestNameBF = BF; +} + } + + if (ClosestNameBF && +MinEditDistance < opts::NameSimilarityFunctionMatchingThreshold) { +matchProfileToFunction(YamlBF, *ClosestNameBF); +++MatchedWithDemangledName; + } } } + outs() << MatchedWithDemangledName << ": functions matched by name similarity\n"; + for (yaml::bolt::BinaryFunctionProfile : YamlBP.Functions) if (!YamlBF.Used && opts::Verbosity >= 1) errs() << "BOLT-WARNING: profile ignored for function " << YamlBF.Name >From 2d23bbd6b9ce4f0786ae8ceb39b1b008b4ca9c4d Mon Sep 17 00:00:00 2001 From: shawbyoung Date: Thu, 20 Jun 2024 23:45:27 -0700 Subject: [PATCH 2/7] spr amend Created using spr 1.3.4 --- bolt/lib/Profile/YAMLProfileReader.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/bolt/lib/Profile/YAMLProfileReader.cpp b/bolt/lib/Profile/YAMLProfileReader.cpp index c9f6d88f0b13a..cf4a5393df8f4 100644 --- a/bolt/lib/Profile/YAMLProfileReader.cpp +++ b/bolt/lib/Profile/YAMLProfileReader.cpp @@ -491,8 +491,6 @@ Error YAMLProfileReader::readProfile(BinaryContext ) {
[llvm-branch-commits] [llvm] [BOLT] Name similarity function matching (PR #95884)
https://github.com/shawbyoung updated https://github.com/llvm/llvm-project/pull/95884 >From 34652b2eebc62218c50a23509ce99937385c30e6 Mon Sep 17 00:00:00 2001 From: shawbyoung Date: Thu, 20 Jun 2024 23:42:00 -0700 Subject: [PATCH 1/7] spr amend Created using spr 1.3.4 --- bolt/lib/Profile/YAMLProfileReader.cpp | 73 -- 1 file changed, 56 insertions(+), 17 deletions(-) diff --git a/bolt/lib/Profile/YAMLProfileReader.cpp b/bolt/lib/Profile/YAMLProfileReader.cpp index 66cabc236f4b2..c9f6d88f0b13a 100644 --- a/bolt/lib/Profile/YAMLProfileReader.cpp +++ b/bolt/lib/Profile/YAMLProfileReader.cpp @@ -424,36 +424,75 @@ Error YAMLProfileReader::readProfile(BinaryContext ) { // Uses name similarity to match functions that were not matched by name. uint64_t MatchedWithDemangledName = 0; - if (opts::NameSimilarityFunctionMatchingThreshold > 0) { - -std::unordered_map NameToBinaryFunction; -NameToBinaryFunction.reserve(BC.getBinaryFunctions().size()); -for (auto &[_, BF] : BC.getBinaryFunctions()) { + if (opts::NameSimilarityFunctionMatchingThreshold > 0) { +auto DemangleName = [&](const char* String) { int Status = 0; - char *DemangledName = abi::__cxa_demangle(BF.getOneName().str().c_str(), + char *DemangledName = abi::__cxa_demangle(String, nullptr, nullptr, ); - if (Status == 0) -NameToBinaryFunction[std::string(DemangledName)] = + return Status == 0 ? new std::string(DemangledName) : nullptr; +}; + +auto DeriveNameSpace = [&](std::string DemangledName) { + size_t LParen = std::string(DemangledName).find("("); + std::string FunctionName = std::string(DemangledName).substr(0, LParen); + size_t ScopeResolutionOperator = std::string(FunctionName).rfind("::"); + return ScopeResolutionOperator == std::string::npos ? std::string("") : std::string(DemangledName).substr(0, ScopeResolutionOperator); +}; + +std::unordered_map> NamespaceToBFs; +NamespaceToBFs.reserve(BC.getBinaryFunctions().size()); + +for (BinaryFunction *BF : BC.getAllBinaryFunctions()) { + std::string* DemangledName = DemangleName(BF->getOneName().str().c_str()); + if (!DemangledName) +continue; + std::string Namespace = DeriveNameSpace(*DemangledName); + auto It = NamespaceToBFs.find(Namespace); + if (It == NamespaceToBFs.end()) +NamespaceToBFs[Namespace] = {BF}; + else +It->second.push_back(BF); } for (auto YamlBF : YamlBP.Functions) { if (YamlBF.Used) continue; - int Status = 0; - char *DemangledName = - abi::__cxa_demangle(YamlBF.Name.c_str(), nullptr, nullptr, ); - if (Status != 0) + std::string* YamlBFDemangledName = DemangleName(YamlBF.Name.c_str()); + if (!YamlBFDemangledName) continue; - auto It = NameToBinaryFunction.find(DemangledName); - if (It == NameToBinaryFunction.end()) + std::string Namespace = DeriveNameSpace(*YamlBFDemangledName); + auto It = NamespaceToBFs.find(Namespace); + if (It == NamespaceToBFs.end()) continue; - BinaryFunction *BF = It->second; - matchProfileToFunction(YamlBF, *BF); - ++MatchedWithDemangledName; + std::vector BFs = It->second; + + unsigned MinEditDistance = UINT_MAX; + BinaryFunction *ClosestNameBF = nullptr; + + for (BinaryFunction *BF : BFs) { +if (ProfiledFunctions.count(BF)) + continue; +std::string *BFDemangledName = DemangleName(BF->getOneName().str().c_str()); +if (!BFDemangledName) + continue; +unsigned BFEditDistance = StringRef(*BFDemangledName).edit_distance(*YamlBFDemangledName); +if (BFEditDistance < MinEditDistance) { + MinEditDistance = BFEditDistance; + ClosestNameBF = BF; +} + } + + if (ClosestNameBF && +MinEditDistance < opts::NameSimilarityFunctionMatchingThreshold) { +matchProfileToFunction(YamlBF, *ClosestNameBF); +++MatchedWithDemangledName; + } } } + outs() << MatchedWithDemangledName << ": functions matched by name similarity\n"; + for (yaml::bolt::BinaryFunctionProfile : YamlBP.Functions) if (!YamlBF.Used && opts::Verbosity >= 1) errs() << "BOLT-WARNING: profile ignored for function " << YamlBF.Name >From 2d23bbd6b9ce4f0786ae8ceb39b1b008b4ca9c4d Mon Sep 17 00:00:00 2001 From: shawbyoung Date: Thu, 20 Jun 2024 23:45:27 -0700 Subject: [PATCH 2/7] spr amend Created using spr 1.3.4 --- bolt/lib/Profile/YAMLProfileReader.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/bolt/lib/Profile/YAMLProfileReader.cpp b/bolt/lib/Profile/YAMLProfileReader.cpp index c9f6d88f0b13a..cf4a5393df8f4 100644 --- a/bolt/lib/Profile/YAMLProfileReader.cpp +++ b/bolt/lib/Profile/YAMLProfileReader.cpp @@ -491,8 +491,6 @@ Error YAMLProfileReader::readProfile(BinaryContext ) {
[llvm-branch-commits] [libcxx] Add release note for #95264 (PR #96116)
ldionne wrote: > We don't have a great way to add release notes after the final release. I > added this to the release announcement do you think that is enough? Yes I think that is fine. https://github.com/llvm/llvm-project/pull/96116 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [libcxx] Add release note for #95264 (PR #96116)
https://github.com/ldionne closed https://github.com/llvm/llvm-project/pull/96116 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT] Name similarity function matching (PR #95884)
https://github.com/shawbyoung updated https://github.com/llvm/llvm-project/pull/95884 >From 34652b2eebc62218c50a23509ce99937385c30e6 Mon Sep 17 00:00:00 2001 From: shawbyoung Date: Thu, 20 Jun 2024 23:42:00 -0700 Subject: [PATCH 1/7] spr amend Created using spr 1.3.4 --- bolt/lib/Profile/YAMLProfileReader.cpp | 73 -- 1 file changed, 56 insertions(+), 17 deletions(-) diff --git a/bolt/lib/Profile/YAMLProfileReader.cpp b/bolt/lib/Profile/YAMLProfileReader.cpp index 66cabc236f4b2..c9f6d88f0b13a 100644 --- a/bolt/lib/Profile/YAMLProfileReader.cpp +++ b/bolt/lib/Profile/YAMLProfileReader.cpp @@ -424,36 +424,75 @@ Error YAMLProfileReader::readProfile(BinaryContext ) { // Uses name similarity to match functions that were not matched by name. uint64_t MatchedWithDemangledName = 0; - if (opts::NameSimilarityFunctionMatchingThreshold > 0) { - -std::unordered_map NameToBinaryFunction; -NameToBinaryFunction.reserve(BC.getBinaryFunctions().size()); -for (auto &[_, BF] : BC.getBinaryFunctions()) { + if (opts::NameSimilarityFunctionMatchingThreshold > 0) { +auto DemangleName = [&](const char* String) { int Status = 0; - char *DemangledName = abi::__cxa_demangle(BF.getOneName().str().c_str(), + char *DemangledName = abi::__cxa_demangle(String, nullptr, nullptr, ); - if (Status == 0) -NameToBinaryFunction[std::string(DemangledName)] = + return Status == 0 ? new std::string(DemangledName) : nullptr; +}; + +auto DeriveNameSpace = [&](std::string DemangledName) { + size_t LParen = std::string(DemangledName).find("("); + std::string FunctionName = std::string(DemangledName).substr(0, LParen); + size_t ScopeResolutionOperator = std::string(FunctionName).rfind("::"); + return ScopeResolutionOperator == std::string::npos ? std::string("") : std::string(DemangledName).substr(0, ScopeResolutionOperator); +}; + +std::unordered_map> NamespaceToBFs; +NamespaceToBFs.reserve(BC.getBinaryFunctions().size()); + +for (BinaryFunction *BF : BC.getAllBinaryFunctions()) { + std::string* DemangledName = DemangleName(BF->getOneName().str().c_str()); + if (!DemangledName) +continue; + std::string Namespace = DeriveNameSpace(*DemangledName); + auto It = NamespaceToBFs.find(Namespace); + if (It == NamespaceToBFs.end()) +NamespaceToBFs[Namespace] = {BF}; + else +It->second.push_back(BF); } for (auto YamlBF : YamlBP.Functions) { if (YamlBF.Used) continue; - int Status = 0; - char *DemangledName = - abi::__cxa_demangle(YamlBF.Name.c_str(), nullptr, nullptr, ); - if (Status != 0) + std::string* YamlBFDemangledName = DemangleName(YamlBF.Name.c_str()); + if (!YamlBFDemangledName) continue; - auto It = NameToBinaryFunction.find(DemangledName); - if (It == NameToBinaryFunction.end()) + std::string Namespace = DeriveNameSpace(*YamlBFDemangledName); + auto It = NamespaceToBFs.find(Namespace); + if (It == NamespaceToBFs.end()) continue; - BinaryFunction *BF = It->second; - matchProfileToFunction(YamlBF, *BF); - ++MatchedWithDemangledName; + std::vector BFs = It->second; + + unsigned MinEditDistance = UINT_MAX; + BinaryFunction *ClosestNameBF = nullptr; + + for (BinaryFunction *BF : BFs) { +if (ProfiledFunctions.count(BF)) + continue; +std::string *BFDemangledName = DemangleName(BF->getOneName().str().c_str()); +if (!BFDemangledName) + continue; +unsigned BFEditDistance = StringRef(*BFDemangledName).edit_distance(*YamlBFDemangledName); +if (BFEditDistance < MinEditDistance) { + MinEditDistance = BFEditDistance; + ClosestNameBF = BF; +} + } + + if (ClosestNameBF && +MinEditDistance < opts::NameSimilarityFunctionMatchingThreshold) { +matchProfileToFunction(YamlBF, *ClosestNameBF); +++MatchedWithDemangledName; + } } } + outs() << MatchedWithDemangledName << ": functions matched by name similarity\n"; + for (yaml::bolt::BinaryFunctionProfile : YamlBP.Functions) if (!YamlBF.Used && opts::Verbosity >= 1) errs() << "BOLT-WARNING: profile ignored for function " << YamlBF.Name >From 2d23bbd6b9ce4f0786ae8ceb39b1b008b4ca9c4d Mon Sep 17 00:00:00 2001 From: shawbyoung Date: Thu, 20 Jun 2024 23:45:27 -0700 Subject: [PATCH 2/7] spr amend Created using spr 1.3.4 --- bolt/lib/Profile/YAMLProfileReader.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/bolt/lib/Profile/YAMLProfileReader.cpp b/bolt/lib/Profile/YAMLProfileReader.cpp index c9f6d88f0b13a..cf4a5393df8f4 100644 --- a/bolt/lib/Profile/YAMLProfileReader.cpp +++ b/bolt/lib/Profile/YAMLProfileReader.cpp @@ -491,8 +491,6 @@ Error YAMLProfileReader::readProfile(BinaryContext ) {
[llvm-branch-commits] [libcxx] Add release note for #95264 (PR #96116)
tstellar wrote: We don't have a great way to add release notes after the final release. I added this to the release announcement do you think that is enough? https://github.com/llvm/llvm-project/pull/96116 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT] Name similarity function matching (PR #95884)
https://github.com/shawbyoung converted_to_draft https://github.com/llvm/llvm-project/pull/95884 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [BOLT][NFC] Move opts::Lite to CommandLineOpts.cpp (PR #96571)
https://github.com/shawbyoung closed https://github.com/llvm/llvm-project/pull/96571 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [BOLT][NFC] Move opts::Lite to CommandLineOpts.cpp (PR #96571)
llvmbot wrote: @llvm/pr-subscribers-bolt Author: shaw young (shawbyoung) Changes Test Plan: n/a --- Full diff: https://github.com/llvm/llvm-project/pull/96571.diff 2 Files Affected: - (modified) bolt/lib/Rewrite/RewriteInstance.cpp (-3) - (modified) bolt/lib/Utils/CommandLineOpts.cpp (+3) ``diff diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp index 1a3a8af21d81b..42c65fcd3ed97 100644 --- a/bolt/lib/Rewrite/RewriteInstance.cpp +++ b/bolt/lib/Rewrite/RewriteInstance.cpp @@ -140,9 +140,6 @@ KeepTmp("keep-tmp", cl::Hidden, cl::cat(BoltCategory)); -cl::opt Lite("lite", cl::desc("skip processing of cold functions"), - cl::cat(BoltCategory)); - static cl::opt LiteThresholdPct("lite-threshold-pct", cl::desc("threshold (in percent) for selecting functions to process in lite " diff --git a/bolt/lib/Utils/CommandLineOpts.cpp b/bolt/lib/Utils/CommandLineOpts.cpp index 41c89bc8aeba4..b9bc79f408a6b 100644 --- a/bolt/lib/Utils/CommandLineOpts.cpp +++ b/bolt/lib/Utils/CommandLineOpts.cpp @@ -128,6 +128,9 @@ cl::opt cl::desc("instrument code to generate accurate profile data"), cl::cat(BoltOptCategory)); +cl::opt Lite("lite", cl::desc("skip processing of cold functions"), + cl::cat(BoltCategory)); + cl::opt OutputFilename("o", cl::desc(""), `` https://github.com/llvm/llvm-project/pull/96571 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [BOLT][NFC] Move opts::Lite to CommandLineOpts.cpp (PR #96571)
https://github.com/shawbyoung created https://github.com/llvm/llvm-project/pull/96571 Test Plan: n/a ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT] Hash-based function matching (PR #95821)
https://github.com/shawbyoung updated https://github.com/llvm/llvm-project/pull/95821 >From 92212c96ea169d26ac10bf8d750539bc5dd72c49 Mon Sep 17 00:00:00 2001 From: shawbyoung Date: Mon, 17 Jun 2024 15:39:02 -0700 Subject: [PATCH 01/20] spr amend Created using spr 1.3.4 --- bolt/lib/Profile/YAMLProfileReader.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/bolt/lib/Profile/YAMLProfileReader.cpp b/bolt/lib/Profile/YAMLProfileReader.cpp index f0fcb1c130002..2bca83c9d11ec 100644 --- a/bolt/lib/Profile/YAMLProfileReader.cpp +++ b/bolt/lib/Profile/YAMLProfileReader.cpp @@ -421,6 +421,8 @@ Error YAMLProfileReader::readProfile(BinaryContext ) { StrictBinaryFunctionHashes.reserve(BC.getBinaryFunctions().size()); for (auto& [_, BF] : BC.getBinaryFunctions()) { +if (!ProfiledFunctions.count()) + continue; StrictBinaryFunctionHashes[BF.getHash()] = } >From 2497922ccc46e3189870563b1fe819b67172778d Mon Sep 17 00:00:00 2001 From: shawbyoung Date: Mon, 17 Jun 2024 15:39:39 -0700 Subject: [PATCH 02/20] spr amend Created using spr 1.3.4 --- bolt/lib/Profile/YAMLProfileReader.cpp | 7 --- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/bolt/lib/Profile/YAMLProfileReader.cpp b/bolt/lib/Profile/YAMLProfileReader.cpp index 2bca83c9d11ec..56474a67307ed 100644 --- a/bolt/lib/Profile/YAMLProfileReader.cpp +++ b/bolt/lib/Profile/YAMLProfileReader.cpp @@ -417,10 +417,10 @@ Error YAMLProfileReader::readProfile(BinaryContext ) { // Uses the strict hash of profiled and binary functions to match functions // that are not matched by name or common name. - std::unordered_map StrictBinaryFunctionHashes; + std::unordered_map StrictBinaryFunctionHashes; StrictBinaryFunctionHashes.reserve(BC.getBinaryFunctions().size()); - for (auto& [_, BF] : BC.getBinaryFunctions()) { + for (auto &[_, BF] : BC.getBinaryFunctions()) { if (!ProfiledFunctions.count()) continue; StrictBinaryFunctionHashes[BF.getHash()] = @@ -428,7 +428,8 @@ Error YAMLProfileReader::readProfile(BinaryContext ) { for (auto YamlBF : YamlBP.Functions) { auto It = StrictBinaryFunctionHashes.find(YamlBF.Hash); -if (It != StrictBinaryFunctionHashes.end() && !ProfiledFunctions.count(It->second)) { +if (It != StrictBinaryFunctionHashes.end() && +!ProfiledFunctions.count(It->second)) { auto *BF = It->second; matchProfileToFunction(YamlBF, *BF); } >From 8e7b2229a69c3795e723404c56e0d4298eef412a Mon Sep 17 00:00:00 2001 From: shawbyoung Date: Mon, 17 Jun 2024 15:55:58 -0700 Subject: [PATCH 03/20] spr amend Created using spr 1.3.4 --- bolt/lib/Profile/YAMLProfileReader.cpp | 2 +- bolt/test/X86/profile-passthrough-block.test | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/bolt/lib/Profile/YAMLProfileReader.cpp b/bolt/lib/Profile/YAMLProfileReader.cpp index 56474a67307ed..779d60bce3b66 100644 --- a/bolt/lib/Profile/YAMLProfileReader.cpp +++ b/bolt/lib/Profile/YAMLProfileReader.cpp @@ -421,7 +421,7 @@ Error YAMLProfileReader::readProfile(BinaryContext ) { StrictBinaryFunctionHashes.reserve(BC.getBinaryFunctions().size()); for (auto &[_, BF] : BC.getBinaryFunctions()) { -if (!ProfiledFunctions.count()) +if (ProfiledFunctions.count()) continue; StrictBinaryFunctionHashes[BF.getHash()] = } diff --git a/bolt/test/X86/profile-passthrough-block.test b/bolt/test/X86/profile-passthrough-block.test index 1b875885260dc..ed2a8117ddfc4 100644 --- a/bolt/test/X86/profile-passthrough-block.test +++ b/bolt/test/X86/profile-passthrough-block.test @@ -57,7 +57,7 @@ header: functions: - name:main fid: 0 -hash:0x +hash:0x0001 exec:1 nblocks: 6 blocks: >From ef5f0dac9185dbb7a62345938d4f309c3379a85d Mon Sep 17 00:00:00 2001 From: shawbyoung Date: Mon, 17 Jun 2024 15:58:22 -0700 Subject: [PATCH 04/20] spr amend Created using spr 1.3.4 --- bolt/lib/Profile/YAMLProfileReader.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/bolt/lib/Profile/YAMLProfileReader.cpp b/bolt/lib/Profile/YAMLProfileReader.cpp index 779d60bce3b66..e3d30bfdb74e4 100644 --- a/bolt/lib/Profile/YAMLProfileReader.cpp +++ b/bolt/lib/Profile/YAMLProfileReader.cpp @@ -427,6 +427,8 @@ Error YAMLProfileReader::readProfile(BinaryContext ) { } for (auto YamlBF : YamlBP.Functions) { +if (YamlBF.Used) + continue; auto It = StrictBinaryFunctionHashes.find(YamlBF.Hash); if (It != StrictBinaryFunctionHashes.end() && !ProfiledFunctions.count(It->second)) { >From 41ce2897a445e47dfe685da66b4af080824e78ed Mon Sep 17 00:00:00 2001 From: shawbyoung Date: Mon, 17 Jun 2024 16:00:27 -0700 Subject: [PATCH 05/20] spr amend Created using spr 1.3.4 --- bolt/test/X86/profile-passthrough-block.test | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git
[llvm-branch-commits] [llvm] [BOLT] Hash-based function matching (PR #95821)
https://github.com/shawbyoung updated https://github.com/llvm/llvm-project/pull/95821 >From 92212c96ea169d26ac10bf8d750539bc5dd72c49 Mon Sep 17 00:00:00 2001 From: shawbyoung Date: Mon, 17 Jun 2024 15:39:02 -0700 Subject: [PATCH 01/20] spr amend Created using spr 1.3.4 --- bolt/lib/Profile/YAMLProfileReader.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/bolt/lib/Profile/YAMLProfileReader.cpp b/bolt/lib/Profile/YAMLProfileReader.cpp index f0fcb1c130002..2bca83c9d11ec 100644 --- a/bolt/lib/Profile/YAMLProfileReader.cpp +++ b/bolt/lib/Profile/YAMLProfileReader.cpp @@ -421,6 +421,8 @@ Error YAMLProfileReader::readProfile(BinaryContext ) { StrictBinaryFunctionHashes.reserve(BC.getBinaryFunctions().size()); for (auto& [_, BF] : BC.getBinaryFunctions()) { +if (!ProfiledFunctions.count()) + continue; StrictBinaryFunctionHashes[BF.getHash()] = } >From 2497922ccc46e3189870563b1fe819b67172778d Mon Sep 17 00:00:00 2001 From: shawbyoung Date: Mon, 17 Jun 2024 15:39:39 -0700 Subject: [PATCH 02/20] spr amend Created using spr 1.3.4 --- bolt/lib/Profile/YAMLProfileReader.cpp | 7 --- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/bolt/lib/Profile/YAMLProfileReader.cpp b/bolt/lib/Profile/YAMLProfileReader.cpp index 2bca83c9d11ec..56474a67307ed 100644 --- a/bolt/lib/Profile/YAMLProfileReader.cpp +++ b/bolt/lib/Profile/YAMLProfileReader.cpp @@ -417,10 +417,10 @@ Error YAMLProfileReader::readProfile(BinaryContext ) { // Uses the strict hash of profiled and binary functions to match functions // that are not matched by name or common name. - std::unordered_map StrictBinaryFunctionHashes; + std::unordered_map StrictBinaryFunctionHashes; StrictBinaryFunctionHashes.reserve(BC.getBinaryFunctions().size()); - for (auto& [_, BF] : BC.getBinaryFunctions()) { + for (auto &[_, BF] : BC.getBinaryFunctions()) { if (!ProfiledFunctions.count()) continue; StrictBinaryFunctionHashes[BF.getHash()] = @@ -428,7 +428,8 @@ Error YAMLProfileReader::readProfile(BinaryContext ) { for (auto YamlBF : YamlBP.Functions) { auto It = StrictBinaryFunctionHashes.find(YamlBF.Hash); -if (It != StrictBinaryFunctionHashes.end() && !ProfiledFunctions.count(It->second)) { +if (It != StrictBinaryFunctionHashes.end() && +!ProfiledFunctions.count(It->second)) { auto *BF = It->second; matchProfileToFunction(YamlBF, *BF); } >From 8e7b2229a69c3795e723404c56e0d4298eef412a Mon Sep 17 00:00:00 2001 From: shawbyoung Date: Mon, 17 Jun 2024 15:55:58 -0700 Subject: [PATCH 03/20] spr amend Created using spr 1.3.4 --- bolt/lib/Profile/YAMLProfileReader.cpp | 2 +- bolt/test/X86/profile-passthrough-block.test | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/bolt/lib/Profile/YAMLProfileReader.cpp b/bolt/lib/Profile/YAMLProfileReader.cpp index 56474a67307ed..779d60bce3b66 100644 --- a/bolt/lib/Profile/YAMLProfileReader.cpp +++ b/bolt/lib/Profile/YAMLProfileReader.cpp @@ -421,7 +421,7 @@ Error YAMLProfileReader::readProfile(BinaryContext ) { StrictBinaryFunctionHashes.reserve(BC.getBinaryFunctions().size()); for (auto &[_, BF] : BC.getBinaryFunctions()) { -if (!ProfiledFunctions.count()) +if (ProfiledFunctions.count()) continue; StrictBinaryFunctionHashes[BF.getHash()] = } diff --git a/bolt/test/X86/profile-passthrough-block.test b/bolt/test/X86/profile-passthrough-block.test index 1b875885260dc..ed2a8117ddfc4 100644 --- a/bolt/test/X86/profile-passthrough-block.test +++ b/bolt/test/X86/profile-passthrough-block.test @@ -57,7 +57,7 @@ header: functions: - name:main fid: 0 -hash:0x +hash:0x0001 exec:1 nblocks: 6 blocks: >From ef5f0dac9185dbb7a62345938d4f309c3379a85d Mon Sep 17 00:00:00 2001 From: shawbyoung Date: Mon, 17 Jun 2024 15:58:22 -0700 Subject: [PATCH 04/20] spr amend Created using spr 1.3.4 --- bolt/lib/Profile/YAMLProfileReader.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/bolt/lib/Profile/YAMLProfileReader.cpp b/bolt/lib/Profile/YAMLProfileReader.cpp index 779d60bce3b66..e3d30bfdb74e4 100644 --- a/bolt/lib/Profile/YAMLProfileReader.cpp +++ b/bolt/lib/Profile/YAMLProfileReader.cpp @@ -427,6 +427,8 @@ Error YAMLProfileReader::readProfile(BinaryContext ) { } for (auto YamlBF : YamlBP.Functions) { +if (YamlBF.Used) + continue; auto It = StrictBinaryFunctionHashes.find(YamlBF.Hash); if (It != StrictBinaryFunctionHashes.end() && !ProfiledFunctions.count(It->second)) { >From 41ce2897a445e47dfe685da66b4af080824e78ed Mon Sep 17 00:00:00 2001 From: shawbyoung Date: Mon, 17 Jun 2024 16:00:27 -0700 Subject: [PATCH 05/20] spr amend Created using spr 1.3.4 --- bolt/test/X86/profile-passthrough-block.test | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git
[llvm-branch-commits] [llvm] AMDGPU: Add subtarget feature for memory atomic fadd f64 (PR #96444)
https://github.com/rampitec approved this pull request. https://github.com/llvm/llvm-project/pull/96444 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Add subtarget feature for memory atomic fadd f64 (PR #96444)
https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/96444 >From baaf96125e8f913a161f1c13216618a3de128182 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Sun, 23 Jun 2024 17:07:53 +0200 Subject: [PATCH] AMDGPU: Add subtarget feature for memory atomic fadd f64 --- llvm/lib/Target/AMDGPU/AMDGPU.td | 21 ++--- llvm/lib/Target/AMDGPU/BUFInstructions.td | 10 ++ llvm/lib/Target/AMDGPU/FLATInstructions.td | 6 +++--- llvm/lib/Target/AMDGPU/GCNSubtarget.h | 10 +++--- llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 2 +- 5 files changed, 31 insertions(+), 18 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td index 0ec65f759bc35..9aaeaf73287d5 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.td +++ b/llvm/lib/Target/AMDGPU/AMDGPU.td @@ -788,6 +788,13 @@ def FeatureFlatAtomicFaddF32Inst "Has flat_atomic_add_f32 instruction" >; +def FeatureFlatBufferGlobalAtomicFaddF64Inst + : SubtargetFeature<"flat-buffer-global-fadd-f64-inst", + "HasFlatBufferGlobalAtomicFaddF64Inst", + "true", + "Has flat, buffer, and global instructions for f64 atomic fadd" +>; + def FeatureMemoryAtomicFaddF32DenormalSupport : SubtargetFeature<"memory-atomic-fadd-f32-denormal-support", "HasAtomicMemoryAtomicFaddF32DenormalSupport", @@ -1388,7 +1395,8 @@ def FeatureISAVersion9_0_A : FeatureSet< FeatureBackOffBarrier, FeatureKernargPreload, FeatureAtomicFMinFMaxF64GlobalInsts, - FeatureAtomicFMinFMaxF64FlatInsts + FeatureAtomicFMinFMaxF64FlatInsts, + FeatureFlatBufferGlobalAtomicFaddF64Inst ])>; def FeatureISAVersion9_0_C : FeatureSet< @@ -1433,7 +1441,8 @@ def FeatureISAVersion9_4_Common : FeatureSet< FeatureAtomicFMinFMaxF64GlobalInsts, FeatureAtomicFMinFMaxF64FlatInsts, FeatureAgentScopeFineGrainedRemoteMemoryAtomics, - FeatureMemoryAtomicFaddF32DenormalSupport + FeatureMemoryAtomicFaddF32DenormalSupport, + FeatureFlatBufferGlobalAtomicFaddF64Inst ]>; def FeatureISAVersion9_4_0 : FeatureSet< @@ -1928,11 +1937,9 @@ def isGFX12Plus : def HasFlatAddressSpace : Predicate<"Subtarget->hasFlatAddressSpace()">, AssemblerPredicate<(all_of FeatureFlatAddressSpace)>; - -def HasBufferFlatGlobalAtomicsF64 : // FIXME: Rename to show it's only for fadd - Predicate<"Subtarget->hasBufferFlatGlobalAtomicsF64()">, - // FIXME: This is too coarse, and working around using pseudo's predicates on real instruction. - AssemblerPredicate<(any_of FeatureGFX90AInsts, FeatureGFX10Insts, FeatureSouthernIslands, FeatureSeaIslands)>; +def HasFlatBufferGlobalAtomicFaddF64Inst : + Predicate<"Subtarget->hasFlatBufferGlobalAtomicFaddF64Inst()">, + AssemblerPredicate<(any_of FeatureFlatBufferGlobalAtomicFaddF64Inst)>; def HasAtomicFMinFMaxF32GlobalInsts : Predicate<"Subtarget->hasAtomicFMinFMaxF32GlobalInsts()">, diff --git a/llvm/lib/Target/AMDGPU/BUFInstructions.td b/llvm/lib/Target/AMDGPU/BUFInstructions.td index 3b8d94b744000..a904c8483dbf5 100644 --- a/llvm/lib/Target/AMDGPU/BUFInstructions.td +++ b/llvm/lib/Target/AMDGPU/BUFInstructions.td @@ -1312,14 +1312,16 @@ let SubtargetPredicate = isGFX90APlus in { } } // End SubtargetPredicate = isGFX90APlus -let SubtargetPredicate = HasBufferFlatGlobalAtomicsF64 in { +let SubtargetPredicate = HasFlatBufferGlobalAtomicFaddF64Inst in { defm BUFFER_ATOMIC_ADD_F64 : MUBUF_Pseudo_Atomics<"buffer_atomic_add_f64", VReg_64, f64>; +} // End SubtargetPredicate = HasFlatBufferGlobalAtomicFaddF64Inst +let SubtargetPredicate = HasAtomicFMinFMaxF64GlobalInsts in { // Note the names can be buffer_atomic_fmin_x2/buffer_atomic_fmax_x2 // depending on some subtargets. defm BUFFER_ATOMIC_MIN_F64 : MUBUF_Pseudo_Atomics<"buffer_atomic_min_f64", VReg_64, f64>; defm BUFFER_ATOMIC_MAX_F64 : MUBUF_Pseudo_Atomics<"buffer_atomic_max_f64", VReg_64, f64>; -} // End SubtargetPredicate = HasBufferFlatGlobalAtomicsF64 +} def BUFFER_INV : MUBUF_Invalidate<"buffer_inv"> { let SubtargetPredicate = isGFX940Plus; @@ -1836,9 +1838,9 @@ let SubtargetPredicate = HasAtomicBufferGlobalPkAddF16Insts in { defm : SIBufferAtomicPat<"SIbuffer_atomic_fadd", v2f16, "BUFFER_ATOMIC_PK_ADD_F16", ["ret"]>; } // End SubtargetPredicate = HasAtomicBufferGlobalPkAddF16Insts -let SubtargetPredicate = HasBufferFlatGlobalAtomicsF64 in { +let SubtargetPredicate = HasFlatBufferGlobalAtomicFaddF64Inst in { defm : SIBufferAtomicPat<"SIbuffer_atomic_fadd", f64, "BUFFER_ATOMIC_ADD_F64">; -} // End SubtargetPredicate = HasBufferFlatGlobalAtomicsF64 +} // End SubtargetPredicate = HasFlatBufferGlobalAtomicFaddF64Inst let SubtargetPredicate = HasAtomicFMinFMaxF64GlobalInsts in { defm : SIBufferAtomicPat<"SIbuffer_atomic_fmin", f64, "BUFFER_ATOMIC_MIN_F64">; diff --git a/llvm/lib/Target/AMDGPU/FLATInstructions.td b/llvm/lib/Target/AMDGPU/FLATInstructions.td index 98054dde398b3..89946a4719557 100644 ---
[llvm-branch-commits] Reapply "[llvm][RISCV] Enable trailing fences for seq-cst stores by default (#87376)" (PR #90267)
https://github.com/ilovepi updated https://github.com/llvm/llvm-project/pull/90267 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] Reapply "[llvm][RISCV] Enable trailing fences for seq-cst stores by default (#87376)" (PR #90267)
https://github.com/ilovepi edited https://github.com/llvm/llvm-project/pull/90267 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] Reapply "[llvm][RISCV] Enable trailing fences for seq-cst stores by default (#87376)" (PR #90267)
https://github.com/ilovepi updated https://github.com/llvm/llvm-project/pull/90267 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [libcxx] Add release note for #95264 (PR #96116)
ldionne wrote: Gentle ping @tstellar, do we want to merge this? The CI issues are unrelated. https://github.com/llvm/llvm-project/pull/96116 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Add subtarget feature for global atomic fadd denormal support (PR #96443)
https://github.com/rampitec approved this pull request. https://github.com/llvm/llvm-project/pull/96443 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [flang] 0f6f6dd - Revert "[Flang][Driver] Add -print-resource-dir command line flag to emit Fla…"
Author: David Truby Date: 2024-06-24T21:53:39+01:00 New Revision: 0f6f6ddbc0d84d2df23df8c8a771ace3c0dca988 URL: https://github.com/llvm/llvm-project/commit/0f6f6ddbc0d84d2df23df8c8a771ace3c0dca988 DIFF: https://github.com/llvm/llvm-project/commit/0f6f6ddbc0d84d2df23df8c8a771ace3c0dca988.diff LOG: Revert "[Flang][Driver] Add -print-resource-dir command line flag to emit Fla…" This reverts commit 2df06e42d733a1f7a1cdf715894921a5bbbc2956. Added: Modified: clang/include/clang/Driver/Driver.h clang/include/clang/Driver/Options.td clang/lib/Driver/Driver.cpp Removed: flang/test/Driver/print-resource-dir.F90 diff --git a/clang/include/clang/Driver/Driver.h b/clang/include/clang/Driver/Driver.h index 084c3ffe69ae8..cc1538372d5f8 100644 --- a/clang/include/clang/Driver/Driver.h +++ b/clang/include/clang/Driver/Driver.h @@ -747,9 +747,6 @@ class Driver { /// option. void setDriverMode(StringRef DriverModeValue); - /// Set the resource directory, depending on which driver is being used. - void setResourceDirectory(); - /// Parse the \p Args list for LTO options and record the type of LTO /// compilation based on which -f(no-)?lto(=.*)? option occurs last. void setLTOMode(const llvm::opt::ArgList ); diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 6416261077ed1..dfdf0741e28eb 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -5535,10 +5535,7 @@ def print_prog_name_EQ : Joined<["-", "--"], "print-prog-name=">, Visibility<[ClangOption, CLOption]>; def print_resource_dir : Flag<["-", "--"], "print-resource-dir">, HelpText<"Print the resource directory pathname">, - HelpTextForVariants<[FlangOption], - "Print the resource directory pathname that contains lib and " - "include directories with the runtime libraries and MODULE files.">, - Visibility<[ClangOption, CLOption, FlangOption]>; + Visibility<[ClangOption, CLOption]>; def print_search_dirs : Flag<["-", "--"], "print-search-dirs">, HelpText<"Print the paths used for finding libraries and programs">, Visibility<[ClangOption, CLOption]>; diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp index 0298d22203d9d..33ab7cc3f3968 100644 --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -229,6 +229,9 @@ Driver::Driver(StringRef ClangExecutable, StringRef TargetTriple, UserConfigDir = static_cast(P); } #endif + + // Compute the path to the resource directory. + ResourceDir = GetResourcesPath(ClangExecutable, CLANG_RESOURCE_DIR); } void Driver::setDriverMode(StringRef Value) { @@ -247,24 +250,6 @@ void Driver::setDriverMode(StringRef Value) { Diag(diag::err_drv_unsupported_option_argument) << OptName << Value; } -void Driver::setResourceDirectory() { - // Compute the path to the resource directory, depending on the driver mode. - switch (Mode) { - case GCCMode: - case GXXMode: - case CPPMode: - case CLMode: - case DXCMode: -ResourceDir = GetResourcesPath(ClangExecutable, CLANG_RESOURCE_DIR); -break; - case FlangMode: -SmallString<64> customResourcePathRelativeToDriver{".."}; -ResourceDir = -GetResourcesPath(ClangExecutable, customResourcePathRelativeToDriver); -break; - } -} - InputArgList Driver::ParseArgStrings(ArrayRef ArgStrings, bool UseDriverMode, bool ) { llvm::PrettyStackTraceString CrashInfo("Command line argument parsing"); @@ -1217,7 +1202,6 @@ Compilation *Driver::BuildCompilation(ArrayRef ArgList) { if (!DriverMode.empty()) setDriverMode(DriverMode); - setResourceDirectory(); // FIXME: What are we going to do with -V and -b? // Arguments specified in command line. diff --git a/flang/test/Driver/print-resource-dir.F90 b/flang/test/Driver/print-resource-dir.F90 deleted file mode 100644 index 8fd35f1800df2..0 --- a/flang/test/Driver/print-resource-dir.F90 +++ /dev/null @@ -1,4 +0,0 @@ -! DEFINE: %{resource_dir} = %S/Inputs/resource_dir -! RUN: %flang -print-resource-dir -resource-dir=%{resource_dir}.. \ -! RUN: | FileCheck -check-prefix=PRINT-RESOURCE-DIR -DFILE=%{resource_dir} %s -! PRINT-RESOURCE-DIR: [[FILE]] ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Add subtarget feature for global atomic fadd denormal support (PR #96443)
arsenm wrote: > It is worse than that. It behaves differently depending on where atomic is > executed. There is no single answer if this instruction supports denorms or > not. That doesn't matter. The flat case that sometimes flushes is just a no. Flushing is never a guarantee, we only need to know a flush may happen https://github.com/llvm/llvm-project/pull/96443 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT] Hash-based function matching (PR #95821)
https://github.com/shawbyoung updated https://github.com/llvm/llvm-project/pull/95821 >From 92212c96ea169d26ac10bf8d750539bc5dd72c49 Mon Sep 17 00:00:00 2001 From: shawbyoung Date: Mon, 17 Jun 2024 15:39:02 -0700 Subject: [PATCH 01/20] spr amend Created using spr 1.3.4 --- bolt/lib/Profile/YAMLProfileReader.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/bolt/lib/Profile/YAMLProfileReader.cpp b/bolt/lib/Profile/YAMLProfileReader.cpp index f0fcb1c130002..2bca83c9d11ec 100644 --- a/bolt/lib/Profile/YAMLProfileReader.cpp +++ b/bolt/lib/Profile/YAMLProfileReader.cpp @@ -421,6 +421,8 @@ Error YAMLProfileReader::readProfile(BinaryContext ) { StrictBinaryFunctionHashes.reserve(BC.getBinaryFunctions().size()); for (auto& [_, BF] : BC.getBinaryFunctions()) { +if (!ProfiledFunctions.count()) + continue; StrictBinaryFunctionHashes[BF.getHash()] = } >From 2497922ccc46e3189870563b1fe819b67172778d Mon Sep 17 00:00:00 2001 From: shawbyoung Date: Mon, 17 Jun 2024 15:39:39 -0700 Subject: [PATCH 02/20] spr amend Created using spr 1.3.4 --- bolt/lib/Profile/YAMLProfileReader.cpp | 7 --- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/bolt/lib/Profile/YAMLProfileReader.cpp b/bolt/lib/Profile/YAMLProfileReader.cpp index 2bca83c9d11ec..56474a67307ed 100644 --- a/bolt/lib/Profile/YAMLProfileReader.cpp +++ b/bolt/lib/Profile/YAMLProfileReader.cpp @@ -417,10 +417,10 @@ Error YAMLProfileReader::readProfile(BinaryContext ) { // Uses the strict hash of profiled and binary functions to match functions // that are not matched by name or common name. - std::unordered_map StrictBinaryFunctionHashes; + std::unordered_map StrictBinaryFunctionHashes; StrictBinaryFunctionHashes.reserve(BC.getBinaryFunctions().size()); - for (auto& [_, BF] : BC.getBinaryFunctions()) { + for (auto &[_, BF] : BC.getBinaryFunctions()) { if (!ProfiledFunctions.count()) continue; StrictBinaryFunctionHashes[BF.getHash()] = @@ -428,7 +428,8 @@ Error YAMLProfileReader::readProfile(BinaryContext ) { for (auto YamlBF : YamlBP.Functions) { auto It = StrictBinaryFunctionHashes.find(YamlBF.Hash); -if (It != StrictBinaryFunctionHashes.end() && !ProfiledFunctions.count(It->second)) { +if (It != StrictBinaryFunctionHashes.end() && +!ProfiledFunctions.count(It->second)) { auto *BF = It->second; matchProfileToFunction(YamlBF, *BF); } >From 8e7b2229a69c3795e723404c56e0d4298eef412a Mon Sep 17 00:00:00 2001 From: shawbyoung Date: Mon, 17 Jun 2024 15:55:58 -0700 Subject: [PATCH 03/20] spr amend Created using spr 1.3.4 --- bolt/lib/Profile/YAMLProfileReader.cpp | 2 +- bolt/test/X86/profile-passthrough-block.test | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/bolt/lib/Profile/YAMLProfileReader.cpp b/bolt/lib/Profile/YAMLProfileReader.cpp index 56474a67307ed..779d60bce3b66 100644 --- a/bolt/lib/Profile/YAMLProfileReader.cpp +++ b/bolt/lib/Profile/YAMLProfileReader.cpp @@ -421,7 +421,7 @@ Error YAMLProfileReader::readProfile(BinaryContext ) { StrictBinaryFunctionHashes.reserve(BC.getBinaryFunctions().size()); for (auto &[_, BF] : BC.getBinaryFunctions()) { -if (!ProfiledFunctions.count()) +if (ProfiledFunctions.count()) continue; StrictBinaryFunctionHashes[BF.getHash()] = } diff --git a/bolt/test/X86/profile-passthrough-block.test b/bolt/test/X86/profile-passthrough-block.test index 1b875885260dc..ed2a8117ddfc4 100644 --- a/bolt/test/X86/profile-passthrough-block.test +++ b/bolt/test/X86/profile-passthrough-block.test @@ -57,7 +57,7 @@ header: functions: - name:main fid: 0 -hash:0x +hash:0x0001 exec:1 nblocks: 6 blocks: >From ef5f0dac9185dbb7a62345938d4f309c3379a85d Mon Sep 17 00:00:00 2001 From: shawbyoung Date: Mon, 17 Jun 2024 15:58:22 -0700 Subject: [PATCH 04/20] spr amend Created using spr 1.3.4 --- bolt/lib/Profile/YAMLProfileReader.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/bolt/lib/Profile/YAMLProfileReader.cpp b/bolt/lib/Profile/YAMLProfileReader.cpp index 779d60bce3b66..e3d30bfdb74e4 100644 --- a/bolt/lib/Profile/YAMLProfileReader.cpp +++ b/bolt/lib/Profile/YAMLProfileReader.cpp @@ -427,6 +427,8 @@ Error YAMLProfileReader::readProfile(BinaryContext ) { } for (auto YamlBF : YamlBP.Functions) { +if (YamlBF.Used) + continue; auto It = StrictBinaryFunctionHashes.find(YamlBF.Hash); if (It != StrictBinaryFunctionHashes.end() && !ProfiledFunctions.count(It->second)) { >From 41ce2897a445e47dfe685da66b4af080824e78ed Mon Sep 17 00:00:00 2001 From: shawbyoung Date: Mon, 17 Jun 2024 16:00:27 -0700 Subject: [PATCH 05/20] spr amend Created using spr 1.3.4 --- bolt/test/X86/profile-passthrough-block.test | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git
[llvm-branch-commits] [mlir] [mlir][Transforms][NFC] Dialect Conversion: Move argument materialization logic (PR #96329)
https://github.com/ftynse approved this pull request. https://github.com/llvm/llvm-project/pull/96329 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [sanitizer] Rename DEFINE_REAL_PTHREAD_FUNCTIONS (PR #96527)
https://github.com/fmayer approved this pull request. https://github.com/llvm/llvm-project/pull/96527 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Add a subtarget feature for fine-grained remote memory support (PR #96442)
@@ -788,6 +788,14 @@ def FeatureFlatAtomicFaddF32Inst "Has flat_atomic_add_f32 instruction" >; +def FeatureAgentScopeFineGrainedRemoteMemoryAtomics + : SubtargetFeature<"agent-scope-fine-grained-remote-memory-atomics", + "HasAgentScopeFineGrainedRemoteMemoryAtomics", + "true", + "Agent (device) scoped atomic operations not directly supported by " yxsamliu wrote: I feel the description is a little bit confusing, at least for me. how about "Agent (device) scoped atomic operations, excluding those directly supported by PCIe (i.e., integer atomic add, exchange, and compare-and-swap), are functional for allocations in host or peer PCIe device memory." https://github.com/llvm/llvm-project/pull/96442 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [sanitizer] Rename DEFINE_REAL_PTHREAD_FUNCTIONS (PR #96527)
https://github.com/vitalybuka edited https://github.com/llvm/llvm-project/pull/96527 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [sanitizer] Rename DEFINE_REAL_PTHREAD_FUNCTIONS (PR #96527)
llvmbot wrote: @llvm/pr-subscribers-pgo Author: Vitaly Buka (vitalybuka) Changes We use REAL() calls in interceptors, but DEFINE_REAL_PTHREAD_FUNCTIONS has nothing to do with them and only used for internal maintenance threads. --- Full diff: https://github.com/llvm/llvm-project/pull/96527.diff 9 Files Affected: - (modified) compiler-rt/lib/asan/asan_interceptors.cpp (+1-1) - (modified) compiler-rt/lib/hwasan/hwasan_interceptors.cpp (+1-1) - (modified) compiler-rt/lib/lsan/lsan_interceptors.cpp (+1-1) - (modified) compiler-rt/lib/memprof/memprof_interceptors.cpp (+1-1) - (modified) compiler-rt/lib/msan/msan_interceptors.cpp (+1-1) - (modified) compiler-rt/lib/sanitizer_common/sanitizer_common_libcdep.cpp (+2-2) - (modified) compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp (+4-4) - (modified) compiler-rt/lib/sanitizer_common/sanitizer_posix.h (+14-14) - (modified) compiler-rt/lib/tsan/rtl/tsan_interceptors_posix.cpp (+4-4) ``diff diff --git a/compiler-rt/lib/asan/asan_interceptors.cpp b/compiler-rt/lib/asan/asan_interceptors.cpp index 6d1360e104975..f8f86a766b204 100644 --- a/compiler-rt/lib/asan/asan_interceptors.cpp +++ b/compiler-rt/lib/asan/asan_interceptors.cpp @@ -333,7 +333,7 @@ INTERCEPTOR(int, pthread_timedjoin_np, void *thread, void **ret, } #endif -DEFINE_REAL_PTHREAD_FUNCTIONS +DEFINE_INTERNAL_PTHREAD_FUNCTIONS #endif // ASAN_INTERCEPT_PTHREAD_CREATE #if ASAN_INTERCEPT_SWAPCONTEXT diff --git a/compiler-rt/lib/hwasan/hwasan_interceptors.cpp b/compiler-rt/lib/hwasan/hwasan_interceptors.cpp index 08ae435b8214a..c10b5c158548e 100644 --- a/compiler-rt/lib/hwasan/hwasan_interceptors.cpp +++ b/compiler-rt/lib/hwasan/hwasan_interceptors.cpp @@ -334,7 +334,7 @@ INTERCEPTOR(int, pthread_timedjoin_np, void *thread, void **ret, } #endif -DEFINE_REAL_PTHREAD_FUNCTIONS +DEFINE_INTERNAL_PTHREAD_FUNCTIONS DEFINE_REAL(int, vfork,) DECLARE_EXTERN_INTERCEPTOR_AND_WRAPPER(int, vfork,) diff --git a/compiler-rt/lib/lsan/lsan_interceptors.cpp b/compiler-rt/lib/lsan/lsan_interceptors.cpp index 1fd0010f9ea93..6df4b6865b379 100644 --- a/compiler-rt/lib/lsan/lsan_interceptors.cpp +++ b/compiler-rt/lib/lsan/lsan_interceptors.cpp @@ -525,7 +525,7 @@ INTERCEPTOR(int, pthread_timedjoin_np, void *thread, void **ret, #define LSAN_MAYBE_INTERCEPT_TIMEDJOIN # endif // SANITIZER_INTERCEPT_TIMEDJOIN -DEFINE_REAL_PTHREAD_FUNCTIONS +DEFINE_INTERNAL_PTHREAD_FUNCTIONS INTERCEPTOR(void, _exit, int status) { if (status == 0 && HasReportedLeaks()) status = common_flags()->exitcode; diff --git a/compiler-rt/lib/memprof/memprof_interceptors.cpp b/compiler-rt/lib/memprof/memprof_interceptors.cpp index a267f6d3d6717..53ee4e953419b 100644 --- a/compiler-rt/lib/memprof/memprof_interceptors.cpp +++ b/compiler-rt/lib/memprof/memprof_interceptors.cpp @@ -166,7 +166,7 @@ INTERCEPTOR(int, pthread_join, void *t, void **arg) { return REAL(pthread_join)(t, arg); } -DEFINE_REAL_PTHREAD_FUNCTIONS +DEFINE_INTERNAL_PTHREAD_FUNCTIONS INTERCEPTOR(char *, index, const char *string, int c) ALIAS(WRAP(strchr)); diff --git a/compiler-rt/lib/msan/msan_interceptors.cpp b/compiler-rt/lib/msan/msan_interceptors.cpp index 9abf240633258..789b739b41189 100644 --- a/compiler-rt/lib/msan/msan_interceptors.cpp +++ b/compiler-rt/lib/msan/msan_interceptors.cpp @@ -1226,7 +1226,7 @@ INTERCEPTOR(int, pthread_timedjoin_np, void *thread, void **retval, } #endif -DEFINE_REAL_PTHREAD_FUNCTIONS +DEFINE_INTERNAL_PTHREAD_FUNCTIONS extern char *tzname[2]; diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_common_libcdep.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_common_libcdep.cpp index 7b74bb1a7e0f3..a174ae7be991d 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_common_libcdep.cpp +++ b/compiler-rt/lib/sanitizer_common/sanitizer_common_libcdep.cpp @@ -87,8 +87,8 @@ void MaybeStartBackgroudThread() { if (!common_flags()->hard_rss_limit_mb && !common_flags()->soft_rss_limit_mb && !common_flags()->heap_profile) return; - if (!_pthread_create) { -VPrintf(1, "%s: real_pthread_create undefined\n", SanitizerToolName); + if (!_pthread_create) { +VPrintf(1, "%s: internal_pthread_create undefined\n", SanitizerToolName); return; // Can't spawn the thread anyway. } diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp index 5d2dd3a7a658f..b590a9e7c3fc6 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp +++ b/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp @@ -1845,18 +1845,18 @@ HandleSignalMode GetHandleSignalMode(int signum) { # if !SANITIZER_GO void *internal_start_thread(void *(*func)(void *arg), void *arg) { - if (_pthread_create == 0) + if (_pthread_create == 0) return nullptr; // Start the thread with signals blocked, otherwise it can steal user signals. ScopedBlockSignals block(nullptr); void *th; -
[llvm-branch-commits] [sanitizer] Rename DEFINE_REAL_PTHREAD_FUNCTIONS (PR #96527)
https://github.com/vitalybuka created https://github.com/llvm/llvm-project/pull/96527 We use REAL() calls in interceptors, but DEFINE_REAL_PTHREAD_FUNCTIONS has nothing to do with them and only used for internal maintenance threads. ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang][misexpect] Add support to clang for profitable annotation diagnostics (PR #96525)
llvmbot wrote: @llvm/pr-subscribers-llvm-transforms @llvm/pr-subscribers-clang-driver Author: Paul Kirth (ilovepi) Changes Add basic plumbing to clang so that diagnostics can be surfaced to users. --- Full diff: https://github.com/llvm/llvm-project/pull/96525.diff 9 Files Affected: - (modified) clang/include/clang/Basic/CodeGenOptions.def (+1) - (modified) clang/include/clang/Driver/Options.td (+4) - (modified) clang/lib/CodeGen/BackendUtil.cpp (+1) - (modified) clang/lib/CodeGen/CodeGenAction.cpp (+4) - (modified) clang/lib/Driver/ToolChains/Clang.cpp (+7) - (modified) clang/lib/Frontend/CompilerInvocation.cpp (+3) - (added) clang/test/Profile/Inputs/missing-annotation.proftext (+18) - (added) clang/test/Profile/missing-annotation.c (+35) - (modified) llvm/lib/Transforms/Utils/MisExpect.cpp (+4-4) ``diff diff --git a/clang/include/clang/Basic/CodeGenOptions.def b/clang/include/clang/Basic/CodeGenOptions.def index e3f6da4a84f69..fab91cd8a76b5 100644 --- a/clang/include/clang/Basic/CodeGenOptions.def +++ b/clang/include/clang/Basic/CodeGenOptions.def @@ -181,6 +181,7 @@ CODEGENOPT(FatalWarnings , 1, 0) ///< Set when -Wa,--fatal-warnings is CODEGENOPT(NoWarn, 1, 0) ///< Set when -Wa,--no-warn is enabled. CODEGENOPT(NoTypeCheck , 1, 0) ///< Set when -Wa,--no-type-check is enabled. CODEGENOPT(MisExpect , 1, 0) ///< Set when -Wmisexpect is enabled +CODEGENOPT(MissingAnnotations, 1, 0) ///< Set when suggesting missing perf annotations CODEGENOPT(EnableSegmentedStacks , 1, 0) ///< Set when -fsplit-stack is enabled. CODEGENOPT(StackClashProtector, 1, 0) ///< Set when -fstack-clash-protection is enabled. CODEGENOPT(NoImplicitFloat , 1, 0) ///< Set when -mno-implicit-float is enabled. diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index c529cc9506667..6dfc5bb437034 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -2079,6 +2079,10 @@ def fdiagnostics_misexpect_tolerance_EQ : Joined<["-"], "fdiagnostics-misexpect- Group, Visibility<[ClangOption, CC1Option]>, MetaVarName<"">, HelpText<"Prevent misexpect diagnostics from being output if the profile counts are within N% of the expected. ">; +defm diagnostics_missing_annotations : BoolFOption<"diagnostics-missing-annotations", +CodeGenOpts<"MissingAnnotations">, DefaultFalse, +PosFlag, +NegFlag>; defm diagnostics_show_option : BoolFOption<"diagnostics-show-option", DiagnosticOpts<"ShowOptionNames">, DefaultTrue, NegFlag, diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp index b09680086248d..2bcc23a6a9655 100644 --- a/clang/lib/CodeGen/BackendUtil.cpp +++ b/clang/lib/CodeGen/BackendUtil.cpp @@ -486,6 +486,7 @@ static bool initTargetOptions(DiagnosticsEngine , Options.MCOptions.PPCUseFullRegisterNames = CodeGenOpts.PPCUseFullRegisterNames; Options.MisExpect = CodeGenOpts.MisExpect; + Options.MissingAnnotations = CodeGenOpts.MissingAnnotations; return true; } diff --git a/clang/lib/CodeGen/CodeGenAction.cpp b/clang/lib/CodeGen/CodeGenAction.cpp index 6d3efdb5ffe34..efebe28e5ebc4 100644 --- a/clang/lib/CodeGen/CodeGenAction.cpp +++ b/clang/lib/CodeGen/CodeGenAction.cpp @@ -326,6 +326,10 @@ void BackendConsumer::HandleTranslationUnit(ASTContext ) { CodeGenOpts.DiagnosticsMisExpectTolerance); } + if (CodeGenOpts.MissingAnnotations) { +Ctx.setAnnotationDiagsRequested(true); + } + // Link each LinkModule into our module. if (!CodeGenOpts.LinkBitcodePostopt && LinkInModules(getModule())) return; diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 2ce9e2f4bcfcd..9704779e00258 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -4243,6 +4243,13 @@ static void RenderDiagnosticsOptions(const Driver , const ArgList , CmdArgs.push_back(Args.MakeArgString(Opt)); } + if (const Arg *A = + Args.getLastArg(options::OPT_fdiagnostics_missing_annotations, + options::OPT_fno_diagnostics_missing_annotations)) { +if (A->getOption().matches(options::OPT_fdiagnostics_missing_annotations)) + CmdArgs.push_back("-fdiagnostics-missing-annotations"); + } + if (const Arg *A = Args.getLastArg(options::OPT_fdiagnostics_format_EQ)) { CmdArgs.push_back("-fdiagnostics-format"); CmdArgs.push_back(A->getValue()); diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp index a6d9f42ace9cc..f9d34cd011f2a 100644 --- a/clang/lib/Frontend/CompilerInvocation.cpp +++ b/clang/lib/Frontend/CompilerInvocation.cpp @@ -4775,6 +4775,9 @@ bool CompilerInvocation::CreateFromArgsImpl( } } + if(Args.hasArg(OPT_fdiagnostics_missing_annotations)) +Res.getCodeGenOpts().MissingAnnotations = true; + if (LangOpts.CUDA) {
[llvm-branch-commits] [misexpect] Support diagnostics from frontend profile data (PR #96524)
llvmbot wrote: @llvm/pr-subscribers-pgo @llvm/pr-subscribers-llvm-transforms Author: Paul Kirth (ilovepi) Changes --- Full diff: https://github.com/llvm/llvm-project/pull/96524.diff 3 Files Affected: - (modified) llvm/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp (+18) - (modified) llvm/lib/Transforms/Utils/MisExpect.cpp (+1-2) - (modified) llvm/test/Transforms/PGOProfile/missing-annotation.ll (+1-1) ``diff diff --git a/llvm/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp b/llvm/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp index 17c5a4ee1fd0b..4075749d0d574 100644 --- a/llvm/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp +++ b/llvm/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp @@ -369,9 +369,21 @@ static bool lowerExpectIntrinsic(Function ) { if (BranchInst *BI = dyn_cast(BB.getTerminator())) { if (handleBranchExpect(*BI)) ExpectIntrinsicsHandled++; + else { +SmallVector Weights; +if (extractBranchWeights(*BI, Weights)) + misexpect::checkMissingAnnotations(*BI, Weights, + /*IsFrontendInstr=*/false); + } } else if (SwitchInst *SI = dyn_cast(BB.getTerminator())) { if (handleSwitchExpect(*SI)) ExpectIntrinsicsHandled++; + else { +SmallVector Weights; +if (extractBranchWeights(*SI, Weights)) + misexpect::checkMissingAnnotations(*SI, Weights, + /*isFrontend=*/false); + } } // Remove llvm.expect intrinsics. Iterate backwards in order @@ -383,6 +395,12 @@ static bool lowerExpectIntrinsic(Function ) { if (SelectInst *SI = dyn_cast()) { if (handleBrSelExpect(*SI)) ExpectIntrinsicsHandled++; + else { +SmallVector Weights; +if (extractBranchWeights(*SI, Weights)) + misexpect::checkMissingAnnotations(*SI, Weights, + /*isFrontend=*/false); + } } continue; } diff --git a/llvm/lib/Transforms/Utils/MisExpect.cpp b/llvm/lib/Transforms/Utils/MisExpect.cpp index 933d9a146533d..1d88f867971e8 100644 --- a/llvm/lib/Transforms/Utils/MisExpect.cpp +++ b/llvm/lib/Transforms/Utils/MisExpect.cpp @@ -302,8 +302,7 @@ void checkMissingAnnotations(Instruction , return; if (IsFrontendInstr) { -// TODO: Frontend checking will have to be thought through, since we need -// to do the check on branches that don't have expect intrinsics +verifyMissingAnnotations(I, ExistingWeights); } else { SmallVector ExpectedWeights; if (extractBranchWeights(I, ExpectedWeights)) diff --git a/llvm/test/Transforms/PGOProfile/missing-annotation.ll b/llvm/test/Transforms/PGOProfile/missing-annotation.ll index 6b52302449900..03b0b3bb5cc54 100644 --- a/llvm/test/Transforms/PGOProfile/missing-annotation.ll +++ b/llvm/test/Transforms/PGOProfile/missing-annotation.ll @@ -3,7 +3,7 @@ ; RUN: llvm-profdata merge %S/Inputs/misexpect-branch-correct.proftext -o %t.profdata -; RUN: opt < %s -passes="function(lower-expect),pgo-instr-use" -pgo-test-profile-file=%t.profdata -pgo-missing-annotations -pass-remarks=missing-annotation -S 2>&1 | FileCheck %s --check-prefix=MISSING_ANNOTATION +; RUN: opt < %s -passes="function(lower-expect),pgo-instr-use" -pgo-test-profile-file=%t.profdata -pgo-missing-annotations -pass-remarks=missing-annotations -S 2>&1 | FileCheck %s --check-prefix=MISSING_ANNOTATION ; MISSING_ANNOTATION: remark: misexpect-branch.c:22:0: Extremely hot condition. Consider adding llvm.expect intrinsic `` https://github.com/llvm/llvm-project/pull/96524 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang][misexpect] Add support to clang for profitable annotation diagnostics (PR #96525)
https://github.com/ilovepi created https://github.com/llvm/llvm-project/pull/96525 Add basic plumbing to clang so that diagnostics can be surfaced to users. ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm][misexpect] Enable diagnostics for profitable llvm.expect annotations (PR #96523)
llvmbot wrote: @llvm/pr-subscribers-llvm-transforms @llvm/pr-subscribers-pgo Author: Paul Kirth (ilovepi) Changes Issue #56502 describes an enhancement related to the use of llvm.expect. The request is for a diagnostic mode that can identify branches that would benefit from the use of llvm.expect based on the branch_weights assigned from a PGO or sample profile. To support identify branches(or switches) that would benefit from the use of an llvm.expect intrinsic, we follow a similar checking pattern to that used in MisExpect, but only in cases where MisExpect diagnostics would not be used (i.e., when an llvm.expect intrinsic has already been used). --- Full diff: https://github.com/llvm/llvm-project/pull/96523.diff 8 Files Affected: - (modified) llvm/include/llvm/IR/LLVMContext.h (+2) - (modified) llvm/include/llvm/Target/TargetOptions.h (+4) - (modified) llvm/include/llvm/Transforms/Utils/MisExpect.h (+3) - (modified) llvm/lib/IR/LLVMContext.cpp (+6) - (modified) llvm/lib/IR/LLVMContextImpl.h (+4) - (modified) llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp (+1) - (modified) llvm/lib/Transforms/Utils/MisExpect.cpp (+137-36) - (added) llvm/test/Transforms/PGOProfile/missing-annotation.ll (+110) ``diff diff --git a/llvm/include/llvm/IR/LLVMContext.h b/llvm/include/llvm/IR/LLVMContext.h index 89ad6f1572c67..94e526d465c8e 100644 --- a/llvm/include/llvm/IR/LLVMContext.h +++ b/llvm/include/llvm/IR/LLVMContext.h @@ -212,6 +212,8 @@ class LLVMContext { void setMisExpectWarningRequested(bool Requested); void setDiagnosticsMisExpectTolerance(std::optional Tolerance); uint32_t getDiagnosticsMisExpectTolerance() const; + bool getAnnotationDiagsRequested() const; + void setAnnotationDiagsRequested(bool Requested); /// Return the minimum hotness value a diagnostic would need in order /// to be included in optimization diagnostics. diff --git a/llvm/include/llvm/Target/TargetOptions.h b/llvm/include/llvm/Target/TargetOptions.h index d3464b5202ff3..447b86f69cfaa 100644 --- a/llvm/include/llvm/Target/TargetOptions.h +++ b/llvm/include/llvm/Target/TargetOptions.h @@ -377,6 +377,10 @@ namespace llvm { /// By default, it is set to false unsigned MisExpect : 1; +/// When set to true, enable MissingAnnotations diagnostics +/// By default, it is set to false +unsigned MissingAnnotations : 1; + /// When set to true, const objects with relocatable address values are put /// into the RO data section. unsigned XCOFFReadOnlyPointers : 1; diff --git a/llvm/include/llvm/Transforms/Utils/MisExpect.h b/llvm/include/llvm/Transforms/Utils/MisExpect.h index e9fba47c97a4d..118d859f93dd6 100644 --- a/llvm/include/llvm/Transforms/Utils/MisExpect.h +++ b/llvm/include/llvm/Transforms/Utils/MisExpect.h @@ -76,6 +76,9 @@ void checkExpectAnnotations(Instruction , const ArrayRef ExistingWeights, bool IsFrontend); +void checkMissingAnnotations(Instruction , + const ArrayRef ExistingWeights, + bool IsFrontendInstr); } // namespace misexpect } // namespace llvm diff --git a/llvm/lib/IR/LLVMContext.cpp b/llvm/lib/IR/LLVMContext.cpp index 8120cccace40b..e1a05c1347969 100644 --- a/llvm/lib/IR/LLVMContext.cpp +++ b/llvm/lib/IR/LLVMContext.cpp @@ -171,6 +171,12 @@ void LLVMContext::setDiagnosticsMisExpectTolerance( uint32_t LLVMContext::getDiagnosticsMisExpectTolerance() const { return pImpl->DiagnosticsMisExpectTolerance.value_or(0); } +void LLVMContext::setAnnotationDiagsRequested(bool Requested) { + pImpl->AnnotationsDiagsRequested = Requested; +} +bool LLVMContext::getAnnotationDiagsRequested() const { + return pImpl->AnnotationsDiagsRequested; +} bool LLVMContext::isDiagnosticsHotnessThresholdSetFromPSI() const { return !pImpl->DiagnosticsHotnessThreshold.has_value(); diff --git a/llvm/lib/IR/LLVMContextImpl.h b/llvm/lib/IR/LLVMContextImpl.h index 5f8df87149f04..59eb5eb29705d 100644 --- a/llvm/lib/IR/LLVMContextImpl.h +++ b/llvm/lib/IR/LLVMContextImpl.h @@ -1495,6 +1495,10 @@ class LLVMContextImpl { std::optional DiagnosticsMisExpectTolerance = 0; bool MisExpectWarningRequested = false; + /// Enables Diagnostics for Missing llvm.expect annotations on extremely hot + /// branches + bool AnnotationsDiagsRequested = false; + /// The specialized remark streamer used by LLVM's OptimizationRemarkEmitter. std::unique_ptr LLVMRS; diff --git a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp index 572d37a2b3e55..0fbf60194696a 100644 --- a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp +++ b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp @@ -2261,6 +2261,7 @@ void llvm::setProfMetadata(Module *M, Instruction *TI, } dbgs() << "\n";); misexpect::checkExpectAnnotations(*TI, Weights, /*IsFrontend=*/false); +
[llvm-branch-commits] [misexpect] Support diagnostics from frontend profile data (PR #96524)
https://github.com/ilovepi created https://github.com/llvm/llvm-project/pull/96524 None ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm][misexpect] Enable diagnostics for profitable llvm.expect annotations (PR #96523)
https://github.com/ilovepi created https://github.com/llvm/llvm-project/pull/96523 Issue #56502 describes an enhancement related to the use of llvm.expect. The request is for a diagnostic mode that can identify branches that would benefit from the use of llvm.expect based on the branch_weights assigned from a PGO or sample profile. To support identify branches(or switches) that would benefit from the use of an llvm.expect intrinsic, we follow a similar checking pattern to that used in MisExpect, but only in cases where MisExpect diagnostics would not be used (i.e., when an llvm.expect intrinsic has already been used). ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Add a subtarget feature for fine-grained remote memory support (PR #96442)
arsenm wrote: > need some tests This does nothing as it is. The real use patches have thousands of tests https://github.com/llvm/llvm-project/pull/96442 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Add subtarget feature for global atomic fadd denormal support (PR #96443)
rampitec wrote: It is worse than that. It behaves differently depending on where atomic is executed. There is no single answer if this instruction supports denorms or not. https://github.com/llvm/llvm-project/pull/96443 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Add subtarget feature for memory atomic fadd f64 (PR #96444)
rampitec wrote: Use it in a predicate when defining pseudos? https://github.com/llvm/llvm-project/pull/96444 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Add a subtarget feature for fine-grained remote memory support (PR #96442)
yxsamliu wrote: need some tests https://github.com/llvm/llvm-project/pull/96442 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Add a subtarget feature for fine-grained remote memory support (PR #96442)
arsenm wrote: No, the string description mentions this is for the non-PCIe supported cases https://github.com/llvm/llvm-project/pull/96442 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm][ProfDataUtils] provide getNumBranchWeights API (PR #90146)
https://github.com/ilovepi updated https://github.com/llvm/llvm-project/pull/90146 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm][ProfDataUtils] provide getNumBranchWeights API (PR #90146)
https://github.com/ilovepi updated https://github.com/llvm/llvm-project/pull/90146 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm][ProfDataUtils] provide getNumBranchWeights API (PR #90146)
https://github.com/ilovepi updated https://github.com/llvm/llvm-project/pull/90146 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm][ProfDataUtils] provide getNumBranchWeights API (PR #90146)
https://github.com/ilovepi updated https://github.com/llvm/llvm-project/pull/90146 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm][misexpect] Update MisExpect to use provenance tracking metadata (PR #86610)
https://github.com/ilovepi updated https://github.com/llvm/llvm-project/pull/86610 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm][misexpect] Update MisExpect to use provenance tracking metadata (PR #86610)
https://github.com/ilovepi updated https://github.com/llvm/llvm-project/pull/86610 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Add a subtarget feature for fine-grained remote memory support (PR #96442)
yxsamliu wrote: If a sub target does not have this feature, does none of the atomic instructions work for fine-grained remote memory, including integer atomic add/xchg/cmpxchg? https://github.com/llvm/llvm-project/pull/96442 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [compiler-rt] [TySan] Improved compatability for tests (PR #96507)
https://github.com/gbMattN edited https://github.com/llvm/llvm-project/pull/96507 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [compiler-rt] [TySan] Improved compatability for tests (PR #96507)
llvmbot wrote: @llvm/pr-subscribers-compiler-rt-sanitizer Author: None (gbMattN) Changes All the violation tests failed when running on my machine. By changing some check lines to regular expressions, we can account for file locality, and hardware specific type differences. There was also an include needed for better integer types. --- Full diff: https://github.com/llvm/llvm-project/pull/96507.diff 6 Files Affected: - (modified) compiler-rt/test/tysan/violation-pr45282.c (+1-1) - (modified) compiler-rt/test/tysan/violation-pr47137.c (+3-2) - (modified) compiler-rt/test/tysan/violation-pr62544.c (+1-1) - (modified) compiler-rt/test/tysan/violation-pr62828.cpp (+1-1) - (modified) compiler-rt/test/tysan/violation-pr68655.cpp (+1-1) - (modified) compiler-rt/test/tysan/violation-pr86685.c (+1-1) ``diff diff --git a/compiler-rt/test/tysan/violation-pr45282.c b/compiler-rt/test/tysan/violation-pr45282.c index 2cbc37b3d1832..37e710ae18deb 100644 --- a/compiler-rt/test/tysan/violation-pr45282.c +++ b/compiler-rt/test/tysan/violation-pr45282.c @@ -18,7 +18,7 @@ int main(void) { // CHECK: TypeSanitizer: type-aliasing-violation on address // CHECK-NEXT: WRITE of size 8 at {{.+}} with type double accesses an existing object of type float -// CHECK-NEXT: in main violation-pr45282.c:25 +// CHECK-NEXT: in main {{.*violation-pr45282.c:25.*}} // loop of problems for (j = 2; j <= 4; ++j) { diff --git a/compiler-rt/test/tysan/violation-pr47137.c b/compiler-rt/test/tysan/violation-pr47137.c index 04d68d1dd936e..7b996d39f81e2 100644 --- a/compiler-rt/test/tysan/violation-pr47137.c +++ b/compiler-rt/test/tysan/violation-pr47137.c @@ -4,6 +4,7 @@ // https://github.com/llvm/llvm-project/issues/47137 #include #include +#include void f(int m) { int n = (4 * m + 2) / 3; @@ -23,8 +24,8 @@ void f(int m) { } // CHECK: TypeSanitizer: type-aliasing-violation on address -// CHECK-NEXT: READ of size 2 at {{.+}} with type short accesses an existing object of type long long -// CHECK-NEXT:in f violation-pr47137.c:30 +// CHECK-NEXT: READ of size 2 at {{.+}} with type short accesses an existing object of type {{(long)+}} +// CHECK-NEXT:in f {{.*violation-pr47137.c:31.*}} for (int i = 0, j = 0; j < 4 * m; i += 4, j += 3) { for (int k = 0; k < 3; k++) { ((uint16_t *)a)[j + k] = ((uint16_t *)a)[i + k]; diff --git a/compiler-rt/test/tysan/violation-pr62544.c b/compiler-rt/test/tysan/violation-pr62544.c index 4187a91bde3fc..7c6cb59156d15 100644 --- a/compiler-rt/test/tysan/violation-pr62544.c +++ b/compiler-rt/test/tysan/violation-pr62544.c @@ -18,7 +18,7 @@ int main() { // CHECK: TypeSanitizer: type-aliasing-violation on address // CHECK-NEXT: WRITE of size 2 at {{.+}} with type short accesses an existing object of type int -// CHECK-NEXT: in main violation-pr62544.c:22 +// CHECK-NEXT: in main {{.*violation-pr62544.c:22.*}} *e = 3; printf("%d\n", a); } diff --git a/compiler-rt/test/tysan/violation-pr62828.cpp b/compiler-rt/test/tysan/violation-pr62828.cpp index 879200c8069b0..f815227d86248 100644 --- a/compiler-rt/test/tysan/violation-pr62828.cpp +++ b/compiler-rt/test/tysan/violation-pr62828.cpp @@ -24,7 +24,7 @@ short *test1(int_v8 *cast_c_array, short_v8 *shuf_c_array1, int *ptr) { // CHECK: ERROR: TypeSanitizer: type-aliasing-violation on address // CHECK-NEXT: READ of size 2 at {{.+}} with type short accesses an existing object of type int -// CHECK-NEXT:in test1(int (*) [8], short (*) [8], int*) violation-pr62828.cpp:29 +// CHECK-NEXT:in test1(int (*) [8], short (*) [8], int*) {{.*violation-pr62828.cpp:29.*}} for (int i3 = 0; i3 < 4; ++i3) { output2[i3] = input2[(i3 * 2)]; } diff --git a/compiler-rt/test/tysan/violation-pr68655.cpp b/compiler-rt/test/tysan/violation-pr68655.cpp index ac20f8c94e1ff..615971c75d20e 100644 --- a/compiler-rt/test/tysan/violation-pr68655.cpp +++ b/compiler-rt/test/tysan/violation-pr68655.cpp @@ -9,7 +9,7 @@ struct S1 { // CHECK: TypeSanitizer: type-aliasing-violation on address // CHECK-NEXT: READ of size 4 at {{.+}} with type int accesses an existing object of type long long (in S1 at offset 0) -// CHECK-NEXT: in copyMem(S1*, S1*) violation-pr68655.cpp:19 +// CHECK-NEXT: in copyMem(S1*, S1*) {{.*violation-pr68655.cpp:19.*}} void inline copyMem(S1 *dst, S1 *src) { unsigned *d = reinterpret_cast(dst); diff --git a/compiler-rt/test/tysan/violation-pr86685.c b/compiler-rt/test/tysan/violation-pr86685.c index b5198c440fa44..6667fc1805195 100644 --- a/compiler-rt/test/tysan/violation-pr86685.c +++ b/compiler-rt/test/tysan/violation-pr86685.c @@ -13,7 +13,7 @@ void foo(int *s, float *f, long n) { // CHECK: TypeSanitizer: type-aliasing-violation on address // CHECK-NEXT: WRITE of size 4 at {{.+}} with type int accesses an existing object of type float -// CHECK-NEXT: #0 {{.+}} in foo violation-pr86685.c:17 +// CHECK-NEXT: #0 {{.+}} in
[llvm-branch-commits] [compiler-rt] [TySan] Improved compatability for tests (PR #96507)
gbMattN wrote: @fhahn (Sorry for the ping, I can't manually add reviewers yet) https://github.com/llvm/llvm-project/pull/96507 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [compiler-rt] [TySan] Improved compatability for tests (PR #96507)
github-actions[bot] wrote: Thank you for submitting a Pull Request (PR) to the LLVM Project! This PR will be automatically labeled and the relevant teams will be notified. If you wish to, you can add reviewers by using the "Reviewers" section on this page. If this is not working for you, it is probably because you do not have write permissions for the repository. In which case you can instead tag reviewers by name in a comment by using `@` followed by their GitHub username. If you have received no comments on your PR for a week, you can request a review by "ping"ing the PR by adding a comment “Ping”. The common courtesy "ping" rate is once a week. Please remember that you are asking for valuable time from other developers. If you have further questions, they may be answered by the [LLVM GitHub User Guide](https://llvm.org/docs/GitHub.html). You can also ask questions in a comment on this PR, on the [LLVM Discord](https://discord.com/invite/xS7Z362) or on the [forums](https://discourse.llvm.org/). https://github.com/llvm/llvm-project/pull/96507 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [compiler-rt] [TySan] Improved compatability for tests (PR #96507)
https://github.com/gbMattN created https://github.com/llvm/llvm-project/pull/96507 All the violation tests failed when running on my machine. By changing some check lines to regular expressions, we can account for file locality, and hardware specific type differences. There was also an include needed for better integer types. >From 712aa7fd384a773dad87a8a2f7a32b6a593c3b35 Mon Sep 17 00:00:00 2001 From: Matthew Nagy Date: Mon, 24 Jun 2024 15:07:01 + Subject: [PATCH] [TySan] Improved compatability for tests --- compiler-rt/test/tysan/violation-pr45282.c | 2 +- compiler-rt/test/tysan/violation-pr47137.c | 5 +++-- compiler-rt/test/tysan/violation-pr62544.c | 2 +- compiler-rt/test/tysan/violation-pr62828.cpp | 2 +- compiler-rt/test/tysan/violation-pr68655.cpp | 2 +- compiler-rt/test/tysan/violation-pr86685.c | 2 +- 6 files changed, 8 insertions(+), 7 deletions(-) diff --git a/compiler-rt/test/tysan/violation-pr45282.c b/compiler-rt/test/tysan/violation-pr45282.c index 2cbc37b3d1832..37e710ae18deb 100644 --- a/compiler-rt/test/tysan/violation-pr45282.c +++ b/compiler-rt/test/tysan/violation-pr45282.c @@ -18,7 +18,7 @@ int main(void) { // CHECK: TypeSanitizer: type-aliasing-violation on address // CHECK-NEXT: WRITE of size 8 at {{.+}} with type double accesses an existing object of type float -// CHECK-NEXT: in main violation-pr45282.c:25 +// CHECK-NEXT: in main {{.*violation-pr45282.c:25.*}} // loop of problems for (j = 2; j <= 4; ++j) { diff --git a/compiler-rt/test/tysan/violation-pr47137.c b/compiler-rt/test/tysan/violation-pr47137.c index 04d68d1dd936e..7b996d39f81e2 100644 --- a/compiler-rt/test/tysan/violation-pr47137.c +++ b/compiler-rt/test/tysan/violation-pr47137.c @@ -4,6 +4,7 @@ // https://github.com/llvm/llvm-project/issues/47137 #include #include +#include void f(int m) { int n = (4 * m + 2) / 3; @@ -23,8 +24,8 @@ void f(int m) { } // CHECK: TypeSanitizer: type-aliasing-violation on address -// CHECK-NEXT: READ of size 2 at {{.+}} with type short accesses an existing object of type long long -// CHECK-NEXT:in f violation-pr47137.c:30 +// CHECK-NEXT: READ of size 2 at {{.+}} with type short accesses an existing object of type {{(long)+}} +// CHECK-NEXT:in f {{.*violation-pr47137.c:31.*}} for (int i = 0, j = 0; j < 4 * m; i += 4, j += 3) { for (int k = 0; k < 3; k++) { ((uint16_t *)a)[j + k] = ((uint16_t *)a)[i + k]; diff --git a/compiler-rt/test/tysan/violation-pr62544.c b/compiler-rt/test/tysan/violation-pr62544.c index 4187a91bde3fc..7c6cb59156d15 100644 --- a/compiler-rt/test/tysan/violation-pr62544.c +++ b/compiler-rt/test/tysan/violation-pr62544.c @@ -18,7 +18,7 @@ int main() { // CHECK: TypeSanitizer: type-aliasing-violation on address // CHECK-NEXT: WRITE of size 2 at {{.+}} with type short accesses an existing object of type int -// CHECK-NEXT: in main violation-pr62544.c:22 +// CHECK-NEXT: in main {{.*violation-pr62544.c:22.*}} *e = 3; printf("%d\n", a); } diff --git a/compiler-rt/test/tysan/violation-pr62828.cpp b/compiler-rt/test/tysan/violation-pr62828.cpp index 879200c8069b0..f815227d86248 100644 --- a/compiler-rt/test/tysan/violation-pr62828.cpp +++ b/compiler-rt/test/tysan/violation-pr62828.cpp @@ -24,7 +24,7 @@ short *test1(int_v8 *cast_c_array, short_v8 *shuf_c_array1, int *ptr) { // CHECK: ERROR: TypeSanitizer: type-aliasing-violation on address // CHECK-NEXT: READ of size 2 at {{.+}} with type short accesses an existing object of type int -// CHECK-NEXT:in test1(int (*) [8], short (*) [8], int*) violation-pr62828.cpp:29 +// CHECK-NEXT:in test1(int (*) [8], short (*) [8], int*) {{.*violation-pr62828.cpp:29.*}} for (int i3 = 0; i3 < 4; ++i3) { output2[i3] = input2[(i3 * 2)]; } diff --git a/compiler-rt/test/tysan/violation-pr68655.cpp b/compiler-rt/test/tysan/violation-pr68655.cpp index ac20f8c94e1ff..615971c75d20e 100644 --- a/compiler-rt/test/tysan/violation-pr68655.cpp +++ b/compiler-rt/test/tysan/violation-pr68655.cpp @@ -9,7 +9,7 @@ struct S1 { // CHECK: TypeSanitizer: type-aliasing-violation on address // CHECK-NEXT: READ of size 4 at {{.+}} with type int accesses an existing object of type long long (in S1 at offset 0) -// CHECK-NEXT: in copyMem(S1*, S1*) violation-pr68655.cpp:19 +// CHECK-NEXT: in copyMem(S1*, S1*) {{.*violation-pr68655.cpp:19.*}} void inline copyMem(S1 *dst, S1 *src) { unsigned *d = reinterpret_cast(dst); diff --git a/compiler-rt/test/tysan/violation-pr86685.c b/compiler-rt/test/tysan/violation-pr86685.c index b5198c440fa44..6667fc1805195 100644 --- a/compiler-rt/test/tysan/violation-pr86685.c +++ b/compiler-rt/test/tysan/violation-pr86685.c @@ -13,7 +13,7 @@ void foo(int *s, float *f, long n) { // CHECK: TypeSanitizer: type-aliasing-violation on address // CHECK-NEXT: WRITE of size 4 at {{.+}} with type int accesses an existing object of type float -// CHECK-NEXT: #0 {{.+}} in
[llvm-branch-commits] [llvm] AMDGPU: Add subtarget feature for memory atomic fadd f64 (PR #96444)
https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/96444 >From 80c3f71f03d3b2ccbcd418d76d417f2a243fdbe4 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Sun, 23 Jun 2024 17:07:53 +0200 Subject: [PATCH 1/2] AMDGPU: Add subtarget feature for memory atomic fadd f64 --- llvm/lib/Target/AMDGPU/AMDGPU.td | 10 +- llvm/lib/Target/AMDGPU/GCNSubtarget.h | 7 +++ llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 2 +- 3 files changed, 17 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td index 0ec65f759bc35..028c54d8d94d2 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.td +++ b/llvm/lib/Target/AMDGPU/AMDGPU.td @@ -788,6 +788,13 @@ def FeatureFlatAtomicFaddF32Inst "Has flat_atomic_add_f32 instruction" >; +def FeatureFlatBufferGlobalAtomicFaddF64Inst + : SubtargetFeature<"flat-buffer-global-fadd-f64-inst", + "HasFlatBufferGlobalAtomicFaddF64Inst", + "true", + "Has flat, buffer, and global instructions for f64 atomic fadd" +>; + def FeatureMemoryAtomicFaddF32DenormalSupport : SubtargetFeature<"memory-atomic-fadd-f32-denormal-support", "HasAtomicMemoryAtomicFaddF32DenormalSupport", @@ -1388,7 +1395,8 @@ def FeatureISAVersion9_0_A : FeatureSet< FeatureBackOffBarrier, FeatureKernargPreload, FeatureAtomicFMinFMaxF64GlobalInsts, - FeatureAtomicFMinFMaxF64FlatInsts + FeatureAtomicFMinFMaxF64FlatInsts, + FeatureFlatBufferGlobalAtomicFaddF64Inst ])>; def FeatureISAVersion9_0_C : FeatureSet< diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h index 674d84422538f..922435c5efaa6 100644 --- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h +++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h @@ -174,6 +174,7 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo, bool HasAtomicGlobalPkAddBF16Inst = false; bool HasAtomicBufferPkAddBF16Inst = false; bool HasFlatAtomicFaddF32Inst = false; + bool HasFlatBufferGlobalAtomicFaddF64Inst = false; bool HasDefaultComponentZero = false; bool HasAgentScopeFineGrainedRemoteMemoryAtomics = false; bool HasDefaultComponentBroadcast = false; @@ -873,6 +874,12 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo, bool hasFlatAtomicFaddF32Inst() const { return HasFlatAtomicFaddF32Inst; } + /// \return true if the target has flat, global, and buffer atomic fadd for + /// double. + bool hasFlatBufferGlobalAtomicFaddF64Inst() const { +return HasFlatBufferGlobalAtomicFaddF64Inst; + } + /// \return true if the target's flat, global, and buffer atomic fadd for /// float supports denormal handling. bool hasMemoryAtomicFaddF32DenormalSupport() const { diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index eec750e5b8251..6b5ba160d6402 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -16028,7 +16028,7 @@ SITargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const { return AtomicExpansionKind::CmpXChg; // global and flat atomic fadd f64: gfx90a, gfx940. -if (Subtarget->hasGFX90AInsts() && Ty->isDoubleTy()) +if (Subtarget->hasFlatBufferGlobalAtomicFaddF64Inst() && Ty->isDoubleTy()) return ReportUnsafeHWInst(AtomicExpansionKind::None); if (AS != AMDGPUAS::FLAT_ADDRESS && Ty->isFloatTy()) { >From c1354032fc55234ffddf9136f17f5ee400c01c16 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Mon, 24 Jun 2024 15:42:17 +0200 Subject: [PATCH 2/2] Add to gfx940 --- llvm/lib/Target/AMDGPU/AMDGPU.td | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td index 028c54d8d94d2..3ed68a259ca15 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.td +++ b/llvm/lib/Target/AMDGPU/AMDGPU.td @@ -1441,7 +1441,8 @@ def FeatureISAVersion9_4_Common : FeatureSet< FeatureAtomicFMinFMaxF64GlobalInsts, FeatureAtomicFMinFMaxF64FlatInsts, FeatureAgentScopeFineGrainedRemoteMemoryAtomics, - FeatureMemoryAtomicFaddF32DenormalSupport + FeatureMemoryAtomicFaddF32DenormalSupport, + FeatureFlatBufferGlobalAtomicFaddF64Inst ]>; def FeatureISAVersion9_4_0 : FeatureSet< ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [mlir] [mlir][linalg] Implement TilingInterface for winograd operators (PR #96184)
@@ -2638,4 +2638,41 @@ def WinogradConv2DOp : Op { + let description = [{ +Decompose winograd operators. It will convert filter, input and output +transform operators into a combination of scf, tensor, and linalg ftynse wrote: Nit: operations https://github.com/llvm/llvm-project/pull/96184 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [mlir] [mlir][linalg] Implement TilingInterface for winograd operators (PR #96184)
@@ -2760,6 +2760,89 @@ LogicalResult WinogradFilterTransformOp::verify() { return success(); } +SmallVector +WinogradFilterTransformOp::getIterationDomain(OpBuilder ) { + Location loc = getLoc(); + Value zero = builder.create(loc, 0); + Value one = builder.create(loc, 1); + Value output = getOutput(); + SmallVector loopBounds(6); + for (unsigned dim = 0; dim < 6; ++dim) { +loopBounds[dim].offset = zero; +loopBounds[dim].size = getDimValue(builder, loc, output, dim); +loopBounds[dim].stride = one; + } + return loopBounds; +} + +SmallVector +WinogradFilterTransformOp::getLoopIteratorTypes() { + SmallVector iteratorTypes(6, + utils::IteratorType::parallel); + return iteratorTypes; +} + +Value getValueFromOpFoldResult(OpFoldResult opFoldResult, OpBuilder , + Location loc) { + if (auto val = opFoldResult.dyn_cast()) { +return val; + } else if (auto attr = opFoldResult.dyn_cast()) { +auto intAttr = cast(attr); +return builder.create(loc, intAttr); + } + // This should never happen if OpFoldResult is correctly formed. ftynse wrote: Then this should be an assertion. https://github.com/llvm/llvm-project/pull/96184 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [mlir] [mlir][linalg] Implement TilingInterface for winograd operators (PR #96184)
@@ -2760,6 +2760,89 @@ LogicalResult WinogradFilterTransformOp::verify() { return success(); } +SmallVector +WinogradFilterTransformOp::getIterationDomain(OpBuilder ) { + Location loc = getLoc(); + Value zero = builder.create(loc, 0); + Value one = builder.create(loc, 1); ftynse wrote: IIRC, `Range` contains list of `OpFoldResult`, meaning we can put attributes there and not materialize operations for these constants. https://github.com/llvm/llvm-project/pull/96184 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [mlir] [mlir][linalg] Implement TilingInterface for winograd operators (PR #96184)
@@ -2760,6 +2760,89 @@ LogicalResult WinogradFilterTransformOp::verify() { return success(); } +SmallVector +WinogradFilterTransformOp::getIterationDomain(OpBuilder ) { + Location loc = getLoc(); + Value zero = builder.create(loc, 0); + Value one = builder.create(loc, 1); + Value output = getOutput(); + SmallVector loopBounds(6); + for (unsigned dim = 0; dim < 6; ++dim) { +loopBounds[dim].offset = zero; +loopBounds[dim].size = getDimValue(builder, loc, output, dim); +loopBounds[dim].stride = one; + } + return loopBounds; +} + +SmallVector +WinogradFilterTransformOp::getLoopIteratorTypes() { + SmallVector iteratorTypes(6, + utils::IteratorType::parallel); + return iteratorTypes; +} + +Value getValueFromOpFoldResult(OpFoldResult opFoldResult, OpBuilder , + Location loc) { + if (auto val = opFoldResult.dyn_cast()) { +return val; + } else if (auto attr = opFoldResult.dyn_cast()) { +auto intAttr = cast(attr); +return builder.create(loc, intAttr); + } ftynse wrote: I suspect this might already exist somewhere in the arith dialect. https://github.com/llvm/llvm-project/pull/96184 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [mlir] [mlir][linalg] Implement TilingInterface for winograd operators (PR #96184)
https://github.com/ftynse commented: I think @MaheshRavishankar should take a look at the interface implementation details. https://github.com/llvm/llvm-project/pull/96184 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [mlir] [mlir][linalg] Implement TilingInterface for winograd operators (PR #96184)
https://github.com/ftynse edited https://github.com/llvm/llvm-project/pull/96184 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [mlir] [mlir][linalg] Decompose winograd operators (PR #96183)
@@ -36,6 +189,92 @@ constexpr TransformMapKeyTy F_2_3{2, 3}; constexpr TransformMapKeyTy F_4_3{4, 3}; constexpr TransformMapKeyTy F_2_5{2, 5}; +struct TransformMatrix { + TransformMatrix(const float *table, int64_t rows, int64_t cols, + int64_t scalarFactor = 1) + : table(table), rows(rows), cols(cols), scalarFactor(scalarFactor) {} + + const float *table; + int64_t rows; + int64_t cols; + int64_t scalarFactor; +}; + +Value create2DTransformMatrix(RewriterBase , Location loc, + TransformMatrix transform, Type type) { + ArrayRef const_vec(transform.table, transform.rows * transform.cols); ftynse wrote: Nit: camelBack ```suggestion ArrayRef constVec(transform.table, transform.rows * transform.cols); ``` https://github.com/llvm/llvm-project/pull/96183 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [mlir] [mlir][linalg] Decompose winograd operators (PR #96183)
@@ -100,6 +594,161 @@ Value matrixMultiply(RewriterBase , Location loc, return expandOutput; } +// This function transforms the output. The data layout of the output is HWNF. +// The transformation matrix is 2-dimension. We need to extract H x W from +// HWNF first. We need to generate 2 levels of loops to iterate on N and F. +// After the transformation, we get +// +// scf.for %n = lo_n to hi_n step 1 +// scf.for %f = lo_f to hi_f step 1 +// %extracted = extract input from result +// %ret = linalg.matmul AT, %extracted +// %ret = linalg.matmul %ret, A +// %inserted = insert %ret into ret +// +Value outputTransform(RewriterBase , Location loc, Value value, + Value output, int64_t m, int64_t r, + bool leftTransform = true, bool rightTransform = true) { + // Map from (m, r) to AT transform matrix. + static const llvm::SmallDenseMap + ATMatrices = { + {F_2_3, TransformMatrix(AT_2x2_3x3, 2, 4)}, + {F_4_3, TransformMatrix(AT_4x4_3x3, 4, 6, 32)}, + {F_2_5, TransformMatrix(AT_2x2_5x5, 2, 6, 16)}, + }; + + // Map from (m, r) to A transform matrix. + static const llvm::SmallDenseMap + AMatrices = { + {F_2_3, TransformMatrix(A_2x2_3x3, 4, 2)}, + {F_4_3, TransformMatrix(A_4x4_3x3, 6, 4, 32)}, + {F_2_5, TransformMatrix(A_2x2_5x5, 6, 2, 16)}, + }; + + auto valueType = cast(value.getType()); + Type elementType = valueType.getElementType(); + auto valueShape = valueType.getShape(); // TileH, TileW, H, W, N, F + int64_t valueH = valueShape[2]; + int64_t valueW = valueShape[3]; + int64_t valueN = valueShape[4]; + int64_t valueF = valueShape[5]; + int64_t alphaH = leftTransform ? m + r - 1 : 1; + int64_t alphaW = rightTransform ? m + r - 1 : 1; + + if (valueH != alphaH && valueH != 1) +return Value(); + if (valueW != alphaW && valueW != 1) +return Value(); + + auto zeroIdx = rewriter.create(loc, 0); + auto nUpperBound = rewriter.create(loc, valueN); + auto fUpperBound = rewriter.create(loc, valueF); + auto oneStep = rewriter.create(loc, 1); + + auto outerForOp = + rewriter.create(loc, zeroIdx, nUpperBound, oneStep, output); + Block *outerForBody = outerForOp.getBody(); + rewriter.setInsertionPointToStart(outerForBody); + Value NIter = outerForBody->getArgument(0); + + auto innerForOp = rewriter.create( + loc, zeroIdx, fUpperBound, oneStep, outerForOp.getRegionIterArgs()[0]); + Block *innerForBody = innerForOp.getBody(); + rewriter.setInsertionPointToStart(innerForBody); + Value FIter = innerForBody->getArgument(0); ftynse wrote: FYI, there's a `mlir::scf::buildLoopNest` somewhere that may space you the boilerplate. https://github.com/llvm/llvm-project/pull/96183 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [mlir] [mlir][linalg] Decompose winograd operators (PR #96183)
@@ -48,6 +287,261 @@ Value collapse2DData(RewriterBase , Location loc, Value data) { reassociation); } +// This function transforms the filter. The data layout of the filter is FHWC. +// The transformation matrix is 2-dimension. We need to extract H x W from +// FHWC first. We need to generate 2 levels of loops to iterate on F and C. +// After the transformation, we get +// +// scf.for %f = lo_f to hi_f step 1 +// scf.for %c = lo_c to hi_c step 1 +// %extracted = extract filter from filter +// %ret = linalg.matmul G, %extracted +// %ret = linalg.matmul %ret, GT +// %inserted = insert %ret into filter +// ftynse wrote: ```suggestion /// This function transforms the filter. The data layout of the filter is FHWC. /// The transformation matrix is 2-dimension. We need to extract H x W from /// FHWC first. We need to generate 2 levels of loops to iterate on F and C. /// After the transformation, we get /// /// scf.for %f = lo_f to hi_f step 1 /// scf.for %c = lo_c to hi_c step 1 /// %extracted = extract filter from filter /// %ret = linalg.matmul G, %extracted /// %ret = linalg.matmul %ret, GT /// %inserted = insert %ret into filter /// ``` https://github.com/llvm/llvm-project/pull/96183 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [mlir] [mlir][linalg] Decompose winograd operators (PR #96183)
@@ -289,6 +938,123 @@ FailureOr winogradConv2DHelper(RewriterBase , return transformedOutput.getDefiningOp(); } +FailureOr +decomposeWinogradFilterTransformHelper(RewriterBase , + linalg::WinogradFilterTransformOp op) { + Location loc = op.getLoc(); + Value filter = op.getFilter(); + auto filterType = cast(filter.getType()); + auto filterShape = filterType.getShape(); + int64_t filterH = filterShape[1]; + int64_t filterW = filterShape[2]; + + // For F(m x 1, r x 1), we only need to do left side transform. + bool leftTransform = filterH != 1; + // For F(1 x m, 1 x r), we only need to do right side transform. + bool rightTransform = filterW != 1; + Value transformedFilter = + filterTransform(rewriter, loc, filter, op.getOutput(), op.getM(), + op.getR(), leftTransform, rightTransform); + if (!transformedFilter) +return failure(); + + rewriter.replaceOp(op, transformedFilter); + + return transformedFilter.getDefiningOp(); +} + +FailureOr +decomposeWinogradInputTransformHelper(RewriterBase , + linalg::WinogradInputTransformOp op) { + Location loc = op.getLoc(); + Value input = op.getInput(); + auto inputType = cast(input.getType()); + auto inputShape = inputType.getShape(); + int64_t inputH = inputShape[1]; + int64_t inputW = inputShape[2]; + + // For F(m x 1, r x 1), we only need to do left side transform. + bool leftTransform = inputH != 1; + // For F(1 x m, 1 x r), we only need to do right side transform. + bool rightTransform = inputW != 1; + Value transformedInput = + inputTransform(rewriter, loc, op.getInput(), op.getOutput(), op.getM(), + op.getR(), leftTransform, rightTransform); + if (!transformedInput) +return failure(); + + rewriter.replaceOp(op, transformedInput); + + return transformedInput.getDefiningOp(); +} + +FailureOr +decomposeWinogradOutputTransformHelper(RewriterBase , + linalg::WinogradOutputTransformOp op) { + Location loc = op.getLoc(); + Value value = op.getValue(); + auto valueType = cast(value.getType()); + auto valueShape = valueType.getShape(); + int64_t valueH = valueShape[2]; + int64_t valueW = valueShape[3]; + + // For F(m x 1, r x 1), we only need to do left side transform. + bool leftTransform = valueH != 1; + // For F(1 x m, 1 x r), we only need to do right side transform. + bool rightTransform = valueW != 1; + Value transformedOutput = + outputTransform(rewriter, loc, value, op.getOutput(), op.getM(), + op.getR(), leftTransform, rightTransform); + if (!transformedOutput) +return failure(); + + rewriter.replaceOp(op, transformedOutput); + + return transformedOutput.getDefiningOp(); +} + +class DecomposeWinogradFilterTransform final +: public OpRewritePattern { +public: + using OpRewritePattern::OpRewritePattern; + + LogicalResult matchAndRewrite(linalg::WinogradFilterTransformOp op, +PatternRewriter ) const override { +if (failed(decomposeWinogradFilterTransformHelper(rewriter, op))) + return failure(); + +return success(); + } +}; + +class DecomposeWinogradInputTransform final +: public OpRewritePattern { +public: + using OpRewritePattern::OpRewritePattern; + + LogicalResult matchAndRewrite(linalg::WinogradInputTransformOp op, +PatternRewriter ) const override { +if (failed(decomposeWinogradInputTransformHelper(rewriter, op))) + return failure(); + +return success(); ftynse wrote: ```suggestion return decomposeWinogradInputTransformHelper(rewriter, op); ``` https://github.com/llvm/llvm-project/pull/96183 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [mlir] [mlir][linalg] Decompose winograd operators (PR #96183)
@@ -48,6 +287,261 @@ Value collapse2DData(RewriterBase , Location loc, Value data) { reassociation); } +// This function transforms the filter. The data layout of the filter is FHWC. +// The transformation matrix is 2-dimension. We need to extract H x W from +// FHWC first. We need to generate 2 levels of loops to iterate on F and C. +// After the transformation, we get +// +// scf.for %f = lo_f to hi_f step 1 +// scf.for %c = lo_c to hi_c step 1 +// %extracted = extract filter from filter +// %ret = linalg.matmul G, %extracted +// %ret = linalg.matmul %ret, GT +// %inserted = insert %ret into filter +// +Value filterTransform(RewriterBase , Location loc, Value filter, + Value retValue, int64_t m, int64_t r, + bool leftTransform = true, bool rightTransform = true) { + // Map from (m, r) to G transform matrix. + static const llvm::SmallDenseMap + GMatrices = { + {F_2_3, TransformMatrix(G_2x2_3x3, 4, 3)}, + {F_4_3, TransformMatrix(G_4x4_3x3, 6, 3)}, + {F_2_5, TransformMatrix(G_2x2_5x5, 6, 5)}, + }; + + // Map from (m, r) to GT transform matrix. + static const llvm::SmallDenseMap + GTMatrices = { + {F_2_3, TransformMatrix(GT_2x2_3x3, 3, 4)}, + {F_4_3, TransformMatrix(GT_4x4_3x3, 3, 6)}, + {F_2_5, TransformMatrix(GT_2x2_5x5, 5, 6)}, + }; + + auto filterType = cast(filter.getType()); + Type elementType = filterType.getElementType(); + auto filterShape = filterType.getShape(); // F, H, W, C + int64_t filterF = filterShape[0]; + int64_t filterH = filterShape[1]; + int64_t filterW = filterShape[2]; + int64_t filterC = filterShape[3]; + + if (filterH != r && filterH != 1) +return Value(); + if (filterW != r && filterW != 1) +return Value(); + + // Return shape is + auto zeroIdx = rewriter.create(loc, 0); + auto fUpperBound = rewriter.create(loc, filterF); + auto cUpperBound = rewriter.create(loc, filterC); + auto oneStep = rewriter.create(loc, 1); + auto outerForOp = + rewriter.create(loc, zeroIdx, fUpperBound, oneStep, retValue); + Block *outerForBody = outerForOp.getBody(); + rewriter.setInsertionPointToStart(outerForBody); + Value FIter = outerForBody->getArgument(0); ftynse wrote: There must be a function on `scf::ForOp` that returns the induction variable and avoids magic constant zero here. https://github.com/llvm/llvm-project/pull/96183 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [mlir] [mlir][linalg] Decompose winograd operators (PR #96183)
@@ -48,6 +287,261 @@ Value collapse2DData(RewriterBase , Location loc, Value data) { reassociation); } +// This function transforms the filter. The data layout of the filter is FHWC. +// The transformation matrix is 2-dimension. We need to extract H x W from +// FHWC first. We need to generate 2 levels of loops to iterate on F and C. +// After the transformation, we get +// +// scf.for %f = lo_f to hi_f step 1 +// scf.for %c = lo_c to hi_c step 1 +// %extracted = extract filter from filter +// %ret = linalg.matmul G, %extracted +// %ret = linalg.matmul %ret, GT +// %inserted = insert %ret into filter +// +Value filterTransform(RewriterBase , Location loc, Value filter, + Value retValue, int64_t m, int64_t r, + bool leftTransform = true, bool rightTransform = true) { + // Map from (m, r) to G transform matrix. + static const llvm::SmallDenseMap + GMatrices = { + {F_2_3, TransformMatrix(G_2x2_3x3, 4, 3)}, + {F_4_3, TransformMatrix(G_4x4_3x3, 6, 3)}, + {F_2_5, TransformMatrix(G_2x2_5x5, 6, 5)}, + }; + + // Map from (m, r) to GT transform matrix. + static const llvm::SmallDenseMap + GTMatrices = { + {F_2_3, TransformMatrix(GT_2x2_3x3, 3, 4)}, + {F_4_3, TransformMatrix(GT_4x4_3x3, 3, 6)}, + {F_2_5, TransformMatrix(GT_2x2_5x5, 5, 6)}, + }; + + auto filterType = cast(filter.getType()); + Type elementType = filterType.getElementType(); + auto filterShape = filterType.getShape(); // F, H, W, C + int64_t filterF = filterShape[0]; + int64_t filterH = filterShape[1]; + int64_t filterW = filterShape[2]; + int64_t filterC = filterShape[3]; + + if (filterH != r && filterH != 1) +return Value(); + if (filterW != r && filterW != 1) +return Value(); + + // Return shape is + auto zeroIdx = rewriter.create(loc, 0); + auto fUpperBound = rewriter.create(loc, filterF); + auto cUpperBound = rewriter.create(loc, filterC); + auto oneStep = rewriter.create(loc, 1); + auto outerForOp = + rewriter.create(loc, zeroIdx, fUpperBound, oneStep, retValue); + Block *outerForBody = outerForOp.getBody(); + rewriter.setInsertionPointToStart(outerForBody); + Value FIter = outerForBody->getArgument(0); + + auto innerForOp = rewriter.create( + loc, zeroIdx, cUpperBound, oneStep, outerForOp.getRegionIterArgs()[0]); + Block *innerForBody = innerForOp.getBody(); + rewriter.setInsertionPointToStart(innerForBody); + Value CIter = innerForBody->getArgument(0); + + // Extract (H, W) from (F, H, W, C) + auto extractFilter = extract2DData( + rewriter, loc, filter, FIter, CIter, /*outLoopIdx=*/0, + /*inLoopIdx=*/3, /*heightIdx=*/1, /*widthIdx=*/2, /*srcSize=*/4); + + TransformMapKeyTy key = {m, r}; + int64_t retRows = 1; + Value matmulRetValue = extractFilter; + if (leftTransform) { +// Get constant transform matrix G +auto it = GMatrices.find(key); +if (it == GMatrices.end()) + return Value(); +const TransformMatrix = it->second; + +retRows = GMatrix.rows; +auto matmulType = RankedTensorType::get({retRows, filterW}, elementType); +auto init = rewriter.create(loc, matmulType.getShape(), + elementType); + +Value G = create2DTransformMatrix(rewriter, loc, GMatrix, elementType); ftynse wrote: I wonder if we rather want to provide these matrices as global memrefs instead of creating locally every time. Have you considered that? https://github.com/llvm/llvm-project/pull/96183 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [mlir] [mlir][linalg] Decompose winograd operators (PR #96183)
@@ -323,5 +1089,12 @@ void populateWinogradConv2DPatterns(RewritePatternSet , int64_t m, patterns.insert(context, m, r); } +void populateDecomposeWinogradOpsPatterns(RewritePatternSet ) { + MLIRContext *context = patterns.getContext(); + patterns.insert(context); + patterns.insert(context); + patterns.insert(context); ftynse wrote: ```suggestion patterns.insert(context); ``` https://github.com/llvm/llvm-project/pull/96183 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [mlir] [mlir][linalg] Decompose winograd operators (PR #96183)
@@ -100,6 +594,161 @@ Value matrixMultiply(RewriterBase , Location loc, return expandOutput; } +// This function transforms the output. The data layout of the output is HWNF. +// The transformation matrix is 2-dimension. We need to extract H x W from +// HWNF first. We need to generate 2 levels of loops to iterate on N and F. +// After the transformation, we get +// +// scf.for %n = lo_n to hi_n step 1 +// scf.for %f = lo_f to hi_f step 1 +// %extracted = extract input from result +// %ret = linalg.matmul AT, %extracted +// %ret = linalg.matmul %ret, A +// %inserted = insert %ret into ret +// +Value outputTransform(RewriterBase , Location loc, Value value, + Value output, int64_t m, int64_t r, + bool leftTransform = true, bool rightTransform = true) { + // Map from (m, r) to AT transform matrix. + static const llvm::SmallDenseMap + ATMatrices = { + {F_2_3, TransformMatrix(AT_2x2_3x3, 2, 4)}, + {F_4_3, TransformMatrix(AT_4x4_3x3, 4, 6, 32)}, + {F_2_5, TransformMatrix(AT_2x2_5x5, 2, 6, 16)}, + }; + + // Map from (m, r) to A transform matrix. + static const llvm::SmallDenseMap + AMatrices = { + {F_2_3, TransformMatrix(A_2x2_3x3, 4, 2)}, + {F_4_3, TransformMatrix(A_4x4_3x3, 6, 4, 32)}, + {F_2_5, TransformMatrix(A_2x2_5x5, 6, 2, 16)}, + }; + + auto valueType = cast(value.getType()); + Type elementType = valueType.getElementType(); + auto valueShape = valueType.getShape(); // TileH, TileW, H, W, N, F + int64_t valueH = valueShape[2]; + int64_t valueW = valueShape[3]; + int64_t valueN = valueShape[4]; + int64_t valueF = valueShape[5]; + int64_t alphaH = leftTransform ? m + r - 1 : 1; + int64_t alphaW = rightTransform ? m + r - 1 : 1; + + if (valueH != alphaH && valueH != 1) +return Value(); + if (valueW != alphaW && valueW != 1) +return Value(); + + auto zeroIdx = rewriter.create(loc, 0); + auto nUpperBound = rewriter.create(loc, valueN); + auto fUpperBound = rewriter.create(loc, valueF); + auto oneStep = rewriter.create(loc, 1); + + auto outerForOp = + rewriter.create(loc, zeroIdx, nUpperBound, oneStep, output); + Block *outerForBody = outerForOp.getBody(); + rewriter.setInsertionPointToStart(outerForBody); + Value NIter = outerForBody->getArgument(0); + + auto innerForOp = rewriter.create( + loc, zeroIdx, fUpperBound, oneStep, outerForOp.getRegionIterArgs()[0]); + Block *innerForBody = innerForOp.getBody(); + rewriter.setInsertionPointToStart(innerForBody); + Value FIter = innerForBody->getArgument(0); + + // Extract (H, W) from (1, 1, H, W, N, F) + auto extractValue = extract2DData( + rewriter, loc, value, NIter, FIter, /*outLoopIdx=*/4, + /*inLoopIdx=*/5, /*heightIdx=*/2, /*widthIdx=*/3, /*srcSize=*/6); + + TransformMapKeyTy key = {m, r}; + int64_t retRows = 1; + int64_t retCols = 1; + int64_t leftScalarFactor = 1; + int64_t rightScalarFactor = 1; + Value matmulRetValue = extractValue; + if (leftTransform) { +// Get constant transform matrix AT +auto it = ATMatrices.find(key); +if (it == ATMatrices.end()) + return Value(); +const TransformMatrix = it->second; + +leftScalarFactor = ATMatrix.scalarFactor; +retRows = ATMatrix.rows; +auto matmulType = RankedTensorType::get({retRows, valueW}, elementType); +auto init = rewriter.create(loc, matmulType.getShape(), + elementType); + +Value AT = create2DTransformMatrix(rewriter, loc, ATMatrix, elementType); +// Multiply AT x m +auto matmulOp = rewriter.create( +loc, matmulType, ValueRange{AT, matmulRetValue}, ValueRange{init}); +matmulRetValue = matmulOp.getResult(0); + } + + if (rightTransform) { +// Get constant transform matrix T +auto it = AMatrices.find(key); +if (it == AMatrices.end()) + return Value(); +const TransformMatrix = it->second; + +rightScalarFactor = AMatrix.scalarFactor; +auto matmulType = +RankedTensorType::get({retRows, AMatrix.cols}, elementType); +retCols = AMatrix.cols; +auto init = rewriter.create(loc, matmulType.getShape(), + elementType); + +Value A = create2DTransformMatrix(rewriter, loc, AMatrix, elementType); +// Multiply y = (AT x m) x A +auto matmulOp = rewriter.create( +loc, matmulType, ValueRange{matmulRetValue, A}, ValueRange{init}); +matmulRetValue = matmulOp.getResult(0); + } + + // Multiply scalar factor. + Value scalarFactor = rewriter.create( + loc, FloatAttr::get(elementType, leftScalarFactor * rightScalarFactor)); + auto matmulType = RankedTensorType::get({retRows, retCols}, elementType); + auto init = + rewriter.create(loc, matmulType.getShape(), elementType); + + auto identityAffineMap = rewriter.getMultiDimIdentityMap(2); + SmallVector affineMaps =
[llvm-branch-commits] [mlir] [mlir][linalg] Decompose winograd operators (PR #96183)
@@ -100,6 +594,161 @@ Value matrixMultiply(RewriterBase , Location loc, return expandOutput; } +// This function transforms the output. The data layout of the output is HWNF. +// The transformation matrix is 2-dimension. We need to extract H x W from +// HWNF first. We need to generate 2 levels of loops to iterate on N and F. +// After the transformation, we get +// +// scf.for %n = lo_n to hi_n step 1 +// scf.for %f = lo_f to hi_f step 1 +// %extracted = extract input from result +// %ret = linalg.matmul AT, %extracted +// %ret = linalg.matmul %ret, A +// %inserted = insert %ret into ret +// +Value outputTransform(RewriterBase , Location loc, Value value, + Value output, int64_t m, int64_t r, + bool leftTransform = true, bool rightTransform = true) { + // Map from (m, r) to AT transform matrix. + static const llvm::SmallDenseMap + ATMatrices = { + {F_2_3, TransformMatrix(AT_2x2_3x3, 2, 4)}, + {F_4_3, TransformMatrix(AT_4x4_3x3, 4, 6, 32)}, + {F_2_5, TransformMatrix(AT_2x2_5x5, 2, 6, 16)}, + }; + + // Map from (m, r) to A transform matrix. + static const llvm::SmallDenseMap + AMatrices = { + {F_2_3, TransformMatrix(A_2x2_3x3, 4, 2)}, + {F_4_3, TransformMatrix(A_4x4_3x3, 6, 4, 32)}, + {F_2_5, TransformMatrix(A_2x2_5x5, 6, 2, 16)}, + }; + + auto valueType = cast(value.getType()); + Type elementType = valueType.getElementType(); + auto valueShape = valueType.getShape(); // TileH, TileW, H, W, N, F + int64_t valueH = valueShape[2]; + int64_t valueW = valueShape[3]; + int64_t valueN = valueShape[4]; + int64_t valueF = valueShape[5]; + int64_t alphaH = leftTransform ? m + r - 1 : 1; + int64_t alphaW = rightTransform ? m + r - 1 : 1; + + if (valueH != alphaH && valueH != 1) +return Value(); + if (valueW != alphaW && valueW != 1) +return Value(); + + auto zeroIdx = rewriter.create(loc, 0); + auto nUpperBound = rewriter.create(loc, valueN); + auto fUpperBound = rewriter.create(loc, valueF); + auto oneStep = rewriter.create(loc, 1); + + auto outerForOp = + rewriter.create(loc, zeroIdx, nUpperBound, oneStep, output); + Block *outerForBody = outerForOp.getBody(); + rewriter.setInsertionPointToStart(outerForBody); + Value NIter = outerForBody->getArgument(0); + + auto innerForOp = rewriter.create( + loc, zeroIdx, fUpperBound, oneStep, outerForOp.getRegionIterArgs()[0]); + Block *innerForBody = innerForOp.getBody(); + rewriter.setInsertionPointToStart(innerForBody); + Value FIter = innerForBody->getArgument(0); + + // Extract (H, W) from (1, 1, H, W, N, F) + auto extractValue = extract2DData( + rewriter, loc, value, NIter, FIter, /*outLoopIdx=*/4, + /*inLoopIdx=*/5, /*heightIdx=*/2, /*widthIdx=*/3, /*srcSize=*/6); + + TransformMapKeyTy key = {m, r}; + int64_t retRows = 1; + int64_t retCols = 1; + int64_t leftScalarFactor = 1; + int64_t rightScalarFactor = 1; + Value matmulRetValue = extractValue; + if (leftTransform) { +// Get constant transform matrix AT +auto it = ATMatrices.find(key); +if (it == ATMatrices.end()) + return Value(); +const TransformMatrix = it->second; + +leftScalarFactor = ATMatrix.scalarFactor; +retRows = ATMatrix.rows; +auto matmulType = RankedTensorType::get({retRows, valueW}, elementType); +auto init = rewriter.create(loc, matmulType.getShape(), + elementType); + +Value AT = create2DTransformMatrix(rewriter, loc, ATMatrix, elementType); +// Multiply AT x m +auto matmulOp = rewriter.create( +loc, matmulType, ValueRange{AT, matmulRetValue}, ValueRange{init}); +matmulRetValue = matmulOp.getResult(0); + } + + if (rightTransform) { +// Get constant transform matrix T +auto it = AMatrices.find(key); +if (it == AMatrices.end()) + return Value(); +const TransformMatrix = it->second; + +rightScalarFactor = AMatrix.scalarFactor; +auto matmulType = +RankedTensorType::get({retRows, AMatrix.cols}, elementType); +retCols = AMatrix.cols; +auto init = rewriter.create(loc, matmulType.getShape(), + elementType); + +Value A = create2DTransformMatrix(rewriter, loc, AMatrix, elementType); +// Multiply y = (AT x m) x A +auto matmulOp = rewriter.create( +loc, matmulType, ValueRange{matmulRetValue, A}, ValueRange{init}); +matmulRetValue = matmulOp.getResult(0); + } + + // Multiply scalar factor. + Value scalarFactor = rewriter.create( + loc, FloatAttr::get(elementType, leftScalarFactor * rightScalarFactor)); + auto matmulType = RankedTensorType::get({retRows, retCols}, elementType); + auto init = + rewriter.create(loc, matmulType.getShape(), elementType); + + auto identityAffineMap = rewriter.getMultiDimIdentityMap(2); + SmallVector affineMaps =
[llvm-branch-commits] [mlir] [mlir][linalg] Decompose winograd operators (PR #96183)
@@ -48,6 +287,261 @@ Value collapse2DData(RewriterBase , Location loc, Value data) { reassociation); } +// This function transforms the filter. The data layout of the filter is FHWC. +// The transformation matrix is 2-dimension. We need to extract H x W from +// FHWC first. We need to generate 2 levels of loops to iterate on F and C. +// After the transformation, we get +// +// scf.for %f = lo_f to hi_f step 1 +// scf.for %c = lo_c to hi_c step 1 +// %extracted = extract filter from filter +// %ret = linalg.matmul G, %extracted +// %ret = linalg.matmul %ret, GT +// %inserted = insert %ret into filter +// +Value filterTransform(RewriterBase , Location loc, Value filter, + Value retValue, int64_t m, int64_t r, + bool leftTransform = true, bool rightTransform = true) { + // Map from (m, r) to G transform matrix. + static const llvm::SmallDenseMap + GMatrices = { + {F_2_3, TransformMatrix(G_2x2_3x3, 4, 3)}, + {F_4_3, TransformMatrix(G_4x4_3x3, 6, 3)}, + {F_2_5, TransformMatrix(G_2x2_5x5, 6, 5)}, + }; + + // Map from (m, r) to GT transform matrix. + static const llvm::SmallDenseMap + GTMatrices = { + {F_2_3, TransformMatrix(GT_2x2_3x3, 3, 4)}, + {F_4_3, TransformMatrix(GT_4x4_3x3, 3, 6)}, + {F_2_5, TransformMatrix(GT_2x2_5x5, 5, 6)}, + }; + + auto filterType = cast(filter.getType()); + Type elementType = filterType.getElementType(); + auto filterShape = filterType.getShape(); // F, H, W, C + int64_t filterF = filterShape[0]; + int64_t filterH = filterShape[1]; + int64_t filterW = filterShape[2]; + int64_t filterC = filterShape[3]; + + if (filterH != r && filterH != 1) +return Value(); + if (filterW != r && filterW != 1) +return Value(); + + // Return shape is + auto zeroIdx = rewriter.create(loc, 0); + auto fUpperBound = rewriter.create(loc, filterF); + auto cUpperBound = rewriter.create(loc, filterC); + auto oneStep = rewriter.create(loc, 1); + auto outerForOp = + rewriter.create(loc, zeroIdx, fUpperBound, oneStep, retValue); + Block *outerForBody = outerForOp.getBody(); + rewriter.setInsertionPointToStart(outerForBody); + Value FIter = outerForBody->getArgument(0); + + auto innerForOp = rewriter.create( + loc, zeroIdx, cUpperBound, oneStep, outerForOp.getRegionIterArgs()[0]); ftynse wrote: Ditto. there must be a better-named function for this. https://github.com/llvm/llvm-project/pull/96183 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [mlir] [mlir][linalg] Decompose winograd operators (PR #96183)
@@ -36,6 +189,92 @@ constexpr TransformMapKeyTy F_2_3{2, 3}; constexpr TransformMapKeyTy F_4_3{4, 3}; constexpr TransformMapKeyTy F_2_5{2, 5}; +struct TransformMatrix { ftynse wrote: Please document top-level entities. https://github.com/llvm/llvm-project/pull/96183 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [mlir] [mlir][linalg] Decompose winograd operators (PR #96183)
@@ -23,6 +26,156 @@ namespace linalg { namespace { +// clang-format off +// Winograd Conv2D uses a minimal 2D filtering algorithm to calculate its +// result. The formula of minimal 2D filtering algorithm F(m x m, r x r), +// m is the output dimension and r is the filter dimension, is +// +// Y = A^T x [ (G x g x G^T) x (B^T x d x B) ] x A +// +// g is filter and d is input data. We need to prepare 6 constant +// transformation matrices, G, G^T, B^T, B, A^T, and A for this formula. +// +// The following tables define these constant transformation matrices for +// F(2 x 2, 3 x 3), F(4 x 4, 3 x 3), and F(2 x 2, 5 x 5) +constexpr float G_2x2_3x3[] = { + -1, 0, 0, + 1./2, -1./2, 1./2, + 1./2, 1./2, 1./2, +0, 0,1 +}; + +constexpr float GT_2x2_3x3[] = { + -1, 1./2, 1./2, 0, +0, -1./2, 1./2, 0, +0, 1./2, 1./2, 1 +}; ftynse wrote: Have you considered introducing a (potentially `constexpr`) transpose function or some sort of transposed access iterator instead of hardcoding transposed matrices? https://github.com/llvm/llvm-project/pull/96183 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Add subtarget feature for global atomic fadd denormal support (PR #96443)
https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/96443 >From f99d34b66486a17e2fe70d372d67fbabde82d5fb Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Sun, 23 Jun 2024 16:44:08 +0200 Subject: [PATCH 1/2] AMDGPU: Add subtarget feature for global atomic fadd denormal support Not sure what the behavior for gfx90a is. The SPG says it always flushes. The instruction documentation says it does not. --- llvm/lib/Target/AMDGPU/AMDGPU.td | 13 +++-- llvm/lib/Target/AMDGPU/GCNSubtarget.h | 7 +++ 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td index 7ff861f5b144d..5f798b4391704 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.td +++ b/llvm/lib/Target/AMDGPU/AMDGPU.td @@ -788,6 +788,13 @@ def FeatureFlatAtomicFaddF32Inst "Has flat_atomic_add_f32 instruction" >; +def FeatureMemoryAtomicFaddF32DenormalSupport + : SubtargetFeature<"memory-atomic-fadd-f32-denormal-support", + "HasAtomicMemoryAtomicFaddF32DenormalSupport", + "true", + "global/flat/buffer atomic fadd for float supports denormal handling" +>; + def FeatureAgentScopeFineGrainedRemoteMemoryAtomics : SubtargetFeature<"agent-scope-fine-grained-remote-memory-atomics", "HasAgentScopeFineGrainedRemoteMemoryAtomics", @@ -1425,7 +1432,8 @@ def FeatureISAVersion9_4_Common : FeatureSet< FeatureKernargPreload, FeatureAtomicFMinFMaxF64GlobalInsts, FeatureAtomicFMinFMaxF64FlatInsts, - FeatureAgentScopeFineGrainedRemoteMemoryAtomics + FeatureAgentScopeFineGrainedRemoteMemoryAtomics, + FeatureMemoryAtomicFaddF32DenormalSupport ]>; def FeatureISAVersion9_4_0 : FeatureSet< @@ -1628,7 +1636,8 @@ def FeatureISAVersion12 : FeatureSet< FeatureVGPRSingleUseHintInsts, FeatureScalarDwordx3Loads, FeatureDPPSrc1SGPR, - FeatureMaxHardClauseLength32]>; + FeatureMaxHardClauseLength32, + FeatureMemoryAtomicFaddF32DenormalSupport]>; def FeatureISAVersion12_Generic: FeatureSet< !listconcat(FeatureISAVersion12.Features, diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h index c40efbdcf7f0b..674d84422538f 100644 --- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h +++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h @@ -167,6 +167,7 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo, bool HasAtomicFlatPkAdd16Insts = false; bool HasAtomicFaddRtnInsts = false; bool HasAtomicFaddNoRtnInsts = false; + bool HasAtomicMemoryAtomicFaddF32DenormalSupport = false; bool HasAtomicBufferGlobalPkAddF16NoRtnInsts = false; bool HasAtomicBufferGlobalPkAddF16Insts = false; bool HasAtomicCSubNoRtnInsts = false; @@ -872,6 +873,12 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo, bool hasFlatAtomicFaddF32Inst() const { return HasFlatAtomicFaddF32Inst; } + /// \return true if the target's flat, global, and buffer atomic fadd for + /// float supports denormal handling. + bool hasMemoryAtomicFaddF32DenormalSupport() const { +return HasAtomicMemoryAtomicFaddF32DenormalSupport; + } + /// \return true if atomic operations targeting fine-grained memory work /// correctly at device scope, in allocations in host or peer PCIe device /// memory. >From 45941357740eb44a7c5828bf7c534aef65448226 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Mon, 24 Jun 2024 12:10:37 +0200 Subject: [PATCH 2/2] Add to gfx11. RDNA 3 manual says "Floating-point addition handles NAN/INF/denorm" thought I'm not sure I trust it. --- llvm/lib/Target/AMDGPU/AMDGPU.td | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td index 5f798b4391704..0ec65f759bc35 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.td +++ b/llvm/lib/Target/AMDGPU/AMDGPU.td @@ -1545,7 +1545,8 @@ def FeatureISAVersion11_Common : FeatureSet< FeatureFlatAtomicFaddF32Inst, FeatureImageInsts, FeaturePackedTID, - FeatureVcmpxPermlaneHazard]>; + FeatureVcmpxPermlaneHazard, + FeatureMemoryAtomicFaddF32DenormalSupport]>; // There are few workarounds that need to be // added to all targets. This pessimizes codegen ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Add a subtarget feature for fine-grained remote memory support (PR #96442)
arsenm wrote: > Actually not, we do not know the bus. Moreover, we know this is opposite. On gfx940/gfx12, we don't need to know the bus to handle the agent scope case correctly. The instructions still function, just always at device scope. We do need to know the bus and/or location to handle the system scope correctly, which will come from the new metadata. https://github.com/llvm/llvm-project/pull/96442 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [AMDGPU][SILoadStoreOptimizer] Merge constrained sloads (PR #96162)
@@ -1701,17 +1732,33 @@ unsigned SILoadStoreOptimizer::getNewOpcode(const CombineInfo , return AMDGPU::S_BUFFER_LOAD_DWORDX8_SGPR_IMM; } case S_LOAD_IMM: -switch (Width) { -default: - return 0; -case 2: - return AMDGPU::S_LOAD_DWORDX2_IMM; -case 3: - return AMDGPU::S_LOAD_DWORDX3_IMM; -case 4: - return AMDGPU::S_LOAD_DWORDX4_IMM; -case 8: - return AMDGPU::S_LOAD_DWORDX8_IMM; +// For targets that support XNACK replay, use the constrained load opcode. +if (STI && STI->hasXnackReplay()) { + switch (Width) { rampitec wrote: > > currently the alignment is picked from the first MMO and that'd definitely > > be smaller than the natural align requirement for the new load > > You don't know that - the alignment in the first MMO will be whatever > alignment the compiler could deduce, which could be large, e.g. if the > pointer used for the first load was known to have a large alignment. Moreover, it can easily be as large as a page. In a case of scalar load and kernarg. https://github.com/llvm/llvm-project/pull/96162 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Add a subtarget feature for fine-grained remote memory support (PR #96442)
rampitec wrote: > We do statically know for some of the targets (mostly gfx12 and gfx940) that > it's supposed to work. This is the "scope downgrade" vs. "nop" cases in the > atomic support table Actually not, we do not know the bus. Moreover, we know this is opposite. https://github.com/llvm/llvm-project/pull/96442 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [AMDGPU][SILoadStoreOptimizer] Merge constrained sloads (PR #96162)
@@ -1701,17 +1732,33 @@ unsigned SILoadStoreOptimizer::getNewOpcode(const CombineInfo , return AMDGPU::S_BUFFER_LOAD_DWORDX8_SGPR_IMM; } case S_LOAD_IMM: -switch (Width) { -default: - return 0; -case 2: - return AMDGPU::S_LOAD_DWORDX2_IMM; -case 3: - return AMDGPU::S_LOAD_DWORDX3_IMM; -case 4: - return AMDGPU::S_LOAD_DWORDX4_IMM; -case 8: - return AMDGPU::S_LOAD_DWORDX8_IMM; +// For targets that support XNACK replay, use the constrained load opcode. +if (STI && STI->hasXnackReplay()) { + switch (Width) { cdevadas wrote: > > currently the alignment is picked from the first MMO and that'd definitely > > be smaller than the natural align requirement for the new load > > You don't know that - the alignment in the first MMO will be whatever > alignment the compiler could deduce, which could be large, e.g. if the > pointer used for the first load was known to have a large alignment. Are you suggesting to check the alignment in the first MMO and see if it is still the preferred alignment for the merge-load? Use the _ec if the alignment is found to be smaller than the expected value. https://github.com/llvm/llvm-project/pull/96162 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [AMDGPU][SILoadStoreOptimizer] Merge constrained sloads (PR #96162)
https://github.com/jayfoad edited https://github.com/llvm/llvm-project/pull/96162 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [AMDGPU][SILoadStoreOptimizer] Merge constrained sloads (PR #96162)
@@ -1701,17 +1732,33 @@ unsigned SILoadStoreOptimizer::getNewOpcode(const CombineInfo , return AMDGPU::S_BUFFER_LOAD_DWORDX8_SGPR_IMM; } case S_LOAD_IMM: -switch (Width) { -default: - return 0; -case 2: - return AMDGPU::S_LOAD_DWORDX2_IMM; -case 3: - return AMDGPU::S_LOAD_DWORDX3_IMM; -case 4: - return AMDGPU::S_LOAD_DWORDX4_IMM; -case 8: - return AMDGPU::S_LOAD_DWORDX8_IMM; +// For targets that support XNACK replay, use the constrained load opcode. +if (STI && STI->hasXnackReplay()) { + switch (Width) { jayfoad wrote: > currently the alignment is picked from the first MMO and that'd definitely be > smaller than the natural align requirement for the new load You don't know that - the alignment in the first MMO will be whatever alignment the compiler could deduce, which could be large. https://github.com/llvm/llvm-project/pull/96162 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [AMDGPU] Codegen support for constrained multi-dword sloads (PR #96163)
@@ -867,13 +867,104 @@ def SMRDBufferImm : ComplexPattern; def SMRDBufferImm32 : ComplexPattern; def SMRDBufferSgprImm : ComplexPattern; +class SMRDAlignedLoadPat : PatFrag <(ops node:$ptr), (Op node:$ptr), [{ + // Returns true if it is a naturally aligned multi-dword load. + LoadSDNode *Ld = cast(N); + unsigned Size = Ld->getMemoryVT().getStoreSize(); + return (Size <= 4) || (Ld->getAlign().value() >= PowerOf2Ceil(Size)); jayfoad wrote: `Ld->getAlign().value()` will never be 12. There's no such thing as a non-power-of-two alignment. https://github.com/llvm/llvm-project/pull/96163 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits