[llvm-branch-commits] [BOLT] Function matching with function calls as anchors (PR #96596)

2024-06-24 Thread shaw young via llvm-branch-commits

https://github.com/shawbyoung created 
https://github.com/llvm/llvm-project/pull/96596



Test Plan: tbd



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [BOLT] Name similarity function matching (PR #95884)

2024-06-24 Thread shaw young via llvm-branch-commits

https://github.com/shawbyoung updated 
https://github.com/llvm/llvm-project/pull/95884

>From 34652b2eebc62218c50a23509ce99937385c30e6 Mon Sep 17 00:00:00 2001
From: shawbyoung 
Date: Thu, 20 Jun 2024 23:42:00 -0700
Subject: [PATCH 1/7] spr amend

Created using spr 1.3.4
---
 bolt/lib/Profile/YAMLProfileReader.cpp | 73 --
 1 file changed, 56 insertions(+), 17 deletions(-)

diff --git a/bolt/lib/Profile/YAMLProfileReader.cpp 
b/bolt/lib/Profile/YAMLProfileReader.cpp
index 66cabc236f4b2..c9f6d88f0b13a 100644
--- a/bolt/lib/Profile/YAMLProfileReader.cpp
+++ b/bolt/lib/Profile/YAMLProfileReader.cpp
@@ -424,36 +424,75 @@ Error YAMLProfileReader::readProfile(BinaryContext ) {
 
   // Uses name similarity to match functions that were not matched by name.
   uint64_t MatchedWithDemangledName = 0;
-  if (opts::NameSimilarityFunctionMatchingThreshold > 0) {
-
-std::unordered_map NameToBinaryFunction;
-NameToBinaryFunction.reserve(BC.getBinaryFunctions().size());
 
-for (auto &[_, BF] : BC.getBinaryFunctions()) {
+  if (opts::NameSimilarityFunctionMatchingThreshold > 0) {
+auto DemangleName = [&](const char* String) {
   int Status = 0;
-  char *DemangledName = abi::__cxa_demangle(BF.getOneName().str().c_str(),
+  char *DemangledName = abi::__cxa_demangle(String,
 nullptr, nullptr, );
-  if (Status == 0)
-NameToBinaryFunction[std::string(DemangledName)] = 
+  return Status == 0 ? new std::string(DemangledName) : nullptr;
+};
+
+auto DeriveNameSpace = [&](std::string DemangledName) {
+  size_t LParen = std::string(DemangledName).find("(");
+  std::string FunctionName = std::string(DemangledName).substr(0, LParen);
+  size_t ScopeResolutionOperator = std::string(FunctionName).rfind("::");
+  return ScopeResolutionOperator == std::string::npos ? std::string("") : 
std::string(DemangledName).substr(0, ScopeResolutionOperator);
+};
+
+std::unordered_map> 
NamespaceToBFs;
+NamespaceToBFs.reserve(BC.getBinaryFunctions().size());
+
+for (BinaryFunction *BF : BC.getAllBinaryFunctions()) {
+  std::string* DemangledName = 
DemangleName(BF->getOneName().str().c_str());
+  if (!DemangledName)
+continue;
+  std::string Namespace = DeriveNameSpace(*DemangledName);
+  auto It = NamespaceToBFs.find(Namespace);
+  if (It == NamespaceToBFs.end())
+NamespaceToBFs[Namespace] = {BF};
+  else
+It->second.push_back(BF);
 }
 
 for (auto YamlBF : YamlBP.Functions) {
   if (YamlBF.Used)
 continue;
-  int Status = 0;
-  char *DemangledName =
-  abi::__cxa_demangle(YamlBF.Name.c_str(), nullptr, nullptr, );
-  if (Status != 0)
+  std::string* YamlBFDemangledName = DemangleName(YamlBF.Name.c_str());
+  if (!YamlBFDemangledName)
 continue;
-  auto It = NameToBinaryFunction.find(DemangledName);
-  if (It == NameToBinaryFunction.end())
+  std::string Namespace = DeriveNameSpace(*YamlBFDemangledName);
+  auto It = NamespaceToBFs.find(Namespace);
+  if (It == NamespaceToBFs.end())
 continue;
-  BinaryFunction *BF = It->second;
-  matchProfileToFunction(YamlBF, *BF);
-  ++MatchedWithDemangledName;
+  std::vector BFs = It->second;
+
+  unsigned MinEditDistance = UINT_MAX;
+  BinaryFunction *ClosestNameBF = nullptr;
+
+  for (BinaryFunction *BF : BFs) {
+if (ProfiledFunctions.count(BF))
+  continue;
+std::string *BFDemangledName = 
DemangleName(BF->getOneName().str().c_str());
+if (!BFDemangledName)
+  continue;
+unsigned BFEditDistance = 
StringRef(*BFDemangledName).edit_distance(*YamlBFDemangledName);
+if (BFEditDistance < MinEditDistance) {
+  MinEditDistance = BFEditDistance;
+  ClosestNameBF = BF;
+}
+  }
+
+  if (ClosestNameBF &&
+MinEditDistance < opts::NameSimilarityFunctionMatchingThreshold) {
+matchProfileToFunction(YamlBF, *ClosestNameBF);
+++MatchedWithDemangledName;
+  }
 }
   }
 
+  outs() << MatchedWithDemangledName  << ": functions matched by name 
similarity\n";
+
   for (yaml::bolt::BinaryFunctionProfile  : YamlBP.Functions)
 if (!YamlBF.Used && opts::Verbosity >= 1)
   errs() << "BOLT-WARNING: profile ignored for function " << YamlBF.Name

>From 2d23bbd6b9ce4f0786ae8ceb39b1b008b4ca9c4d Mon Sep 17 00:00:00 2001
From: shawbyoung 
Date: Thu, 20 Jun 2024 23:45:27 -0700
Subject: [PATCH 2/7] spr amend

Created using spr 1.3.4
---
 bolt/lib/Profile/YAMLProfileReader.cpp | 2 --
 1 file changed, 2 deletions(-)

diff --git a/bolt/lib/Profile/YAMLProfileReader.cpp 
b/bolt/lib/Profile/YAMLProfileReader.cpp
index c9f6d88f0b13a..cf4a5393df8f4 100644
--- a/bolt/lib/Profile/YAMLProfileReader.cpp
+++ b/bolt/lib/Profile/YAMLProfileReader.cpp
@@ -491,8 +491,6 @@ Error YAMLProfileReader::readProfile(BinaryContext ) {
  

[llvm-branch-commits] [llvm] [BOLT] Name similarity function matching (PR #95884)

2024-06-24 Thread shaw young via llvm-branch-commits

https://github.com/shawbyoung updated 
https://github.com/llvm/llvm-project/pull/95884

>From 34652b2eebc62218c50a23509ce99937385c30e6 Mon Sep 17 00:00:00 2001
From: shawbyoung 
Date: Thu, 20 Jun 2024 23:42:00 -0700
Subject: [PATCH 1/7] spr amend

Created using spr 1.3.4
---
 bolt/lib/Profile/YAMLProfileReader.cpp | 73 --
 1 file changed, 56 insertions(+), 17 deletions(-)

diff --git a/bolt/lib/Profile/YAMLProfileReader.cpp 
b/bolt/lib/Profile/YAMLProfileReader.cpp
index 66cabc236f4b2..c9f6d88f0b13a 100644
--- a/bolt/lib/Profile/YAMLProfileReader.cpp
+++ b/bolt/lib/Profile/YAMLProfileReader.cpp
@@ -424,36 +424,75 @@ Error YAMLProfileReader::readProfile(BinaryContext ) {
 
   // Uses name similarity to match functions that were not matched by name.
   uint64_t MatchedWithDemangledName = 0;
-  if (opts::NameSimilarityFunctionMatchingThreshold > 0) {
-
-std::unordered_map NameToBinaryFunction;
-NameToBinaryFunction.reserve(BC.getBinaryFunctions().size());
 
-for (auto &[_, BF] : BC.getBinaryFunctions()) {
+  if (opts::NameSimilarityFunctionMatchingThreshold > 0) {
+auto DemangleName = [&](const char* String) {
   int Status = 0;
-  char *DemangledName = abi::__cxa_demangle(BF.getOneName().str().c_str(),
+  char *DemangledName = abi::__cxa_demangle(String,
 nullptr, nullptr, );
-  if (Status == 0)
-NameToBinaryFunction[std::string(DemangledName)] = 
+  return Status == 0 ? new std::string(DemangledName) : nullptr;
+};
+
+auto DeriveNameSpace = [&](std::string DemangledName) {
+  size_t LParen = std::string(DemangledName).find("(");
+  std::string FunctionName = std::string(DemangledName).substr(0, LParen);
+  size_t ScopeResolutionOperator = std::string(FunctionName).rfind("::");
+  return ScopeResolutionOperator == std::string::npos ? std::string("") : 
std::string(DemangledName).substr(0, ScopeResolutionOperator);
+};
+
+std::unordered_map> 
NamespaceToBFs;
+NamespaceToBFs.reserve(BC.getBinaryFunctions().size());
+
+for (BinaryFunction *BF : BC.getAllBinaryFunctions()) {
+  std::string* DemangledName = 
DemangleName(BF->getOneName().str().c_str());
+  if (!DemangledName)
+continue;
+  std::string Namespace = DeriveNameSpace(*DemangledName);
+  auto It = NamespaceToBFs.find(Namespace);
+  if (It == NamespaceToBFs.end())
+NamespaceToBFs[Namespace] = {BF};
+  else
+It->second.push_back(BF);
 }
 
 for (auto YamlBF : YamlBP.Functions) {
   if (YamlBF.Used)
 continue;
-  int Status = 0;
-  char *DemangledName =
-  abi::__cxa_demangle(YamlBF.Name.c_str(), nullptr, nullptr, );
-  if (Status != 0)
+  std::string* YamlBFDemangledName = DemangleName(YamlBF.Name.c_str());
+  if (!YamlBFDemangledName)
 continue;
-  auto It = NameToBinaryFunction.find(DemangledName);
-  if (It == NameToBinaryFunction.end())
+  std::string Namespace = DeriveNameSpace(*YamlBFDemangledName);
+  auto It = NamespaceToBFs.find(Namespace);
+  if (It == NamespaceToBFs.end())
 continue;
-  BinaryFunction *BF = It->second;
-  matchProfileToFunction(YamlBF, *BF);
-  ++MatchedWithDemangledName;
+  std::vector BFs = It->second;
+
+  unsigned MinEditDistance = UINT_MAX;
+  BinaryFunction *ClosestNameBF = nullptr;
+
+  for (BinaryFunction *BF : BFs) {
+if (ProfiledFunctions.count(BF))
+  continue;
+std::string *BFDemangledName = 
DemangleName(BF->getOneName().str().c_str());
+if (!BFDemangledName)
+  continue;
+unsigned BFEditDistance = 
StringRef(*BFDemangledName).edit_distance(*YamlBFDemangledName);
+if (BFEditDistance < MinEditDistance) {
+  MinEditDistance = BFEditDistance;
+  ClosestNameBF = BF;
+}
+  }
+
+  if (ClosestNameBF &&
+MinEditDistance < opts::NameSimilarityFunctionMatchingThreshold) {
+matchProfileToFunction(YamlBF, *ClosestNameBF);
+++MatchedWithDemangledName;
+  }
 }
   }
 
+  outs() << MatchedWithDemangledName  << ": functions matched by name 
similarity\n";
+
   for (yaml::bolt::BinaryFunctionProfile  : YamlBP.Functions)
 if (!YamlBF.Used && opts::Verbosity >= 1)
   errs() << "BOLT-WARNING: profile ignored for function " << YamlBF.Name

>From 2d23bbd6b9ce4f0786ae8ceb39b1b008b4ca9c4d Mon Sep 17 00:00:00 2001
From: shawbyoung 
Date: Thu, 20 Jun 2024 23:45:27 -0700
Subject: [PATCH 2/7] spr amend

Created using spr 1.3.4
---
 bolt/lib/Profile/YAMLProfileReader.cpp | 2 --
 1 file changed, 2 deletions(-)

diff --git a/bolt/lib/Profile/YAMLProfileReader.cpp 
b/bolt/lib/Profile/YAMLProfileReader.cpp
index c9f6d88f0b13a..cf4a5393df8f4 100644
--- a/bolt/lib/Profile/YAMLProfileReader.cpp
+++ b/bolt/lib/Profile/YAMLProfileReader.cpp
@@ -491,8 +491,6 @@ Error YAMLProfileReader::readProfile(BinaryContext ) {
  

[llvm-branch-commits] [libcxx] Add release note for #95264 (PR #96116)

2024-06-24 Thread Louis Dionne via llvm-branch-commits

ldionne wrote:

> We don't have a great way to add release notes after the final release. I 
> added this to the release announcement do you think that is enough?

Yes I think that is fine.

https://github.com/llvm/llvm-project/pull/96116
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [libcxx] Add release note for #95264 (PR #96116)

2024-06-24 Thread Louis Dionne via llvm-branch-commits

https://github.com/ldionne closed 
https://github.com/llvm/llvm-project/pull/96116
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [BOLT] Name similarity function matching (PR #95884)

2024-06-24 Thread shaw young via llvm-branch-commits

https://github.com/shawbyoung updated 
https://github.com/llvm/llvm-project/pull/95884

>From 34652b2eebc62218c50a23509ce99937385c30e6 Mon Sep 17 00:00:00 2001
From: shawbyoung 
Date: Thu, 20 Jun 2024 23:42:00 -0700
Subject: [PATCH 1/7] spr amend

Created using spr 1.3.4
---
 bolt/lib/Profile/YAMLProfileReader.cpp | 73 --
 1 file changed, 56 insertions(+), 17 deletions(-)

diff --git a/bolt/lib/Profile/YAMLProfileReader.cpp 
b/bolt/lib/Profile/YAMLProfileReader.cpp
index 66cabc236f4b2..c9f6d88f0b13a 100644
--- a/bolt/lib/Profile/YAMLProfileReader.cpp
+++ b/bolt/lib/Profile/YAMLProfileReader.cpp
@@ -424,36 +424,75 @@ Error YAMLProfileReader::readProfile(BinaryContext ) {
 
   // Uses name similarity to match functions that were not matched by name.
   uint64_t MatchedWithDemangledName = 0;
-  if (opts::NameSimilarityFunctionMatchingThreshold > 0) {
-
-std::unordered_map NameToBinaryFunction;
-NameToBinaryFunction.reserve(BC.getBinaryFunctions().size());
 
-for (auto &[_, BF] : BC.getBinaryFunctions()) {
+  if (opts::NameSimilarityFunctionMatchingThreshold > 0) {
+auto DemangleName = [&](const char* String) {
   int Status = 0;
-  char *DemangledName = abi::__cxa_demangle(BF.getOneName().str().c_str(),
+  char *DemangledName = abi::__cxa_demangle(String,
 nullptr, nullptr, );
-  if (Status == 0)
-NameToBinaryFunction[std::string(DemangledName)] = 
+  return Status == 0 ? new std::string(DemangledName) : nullptr;
+};
+
+auto DeriveNameSpace = [&](std::string DemangledName) {
+  size_t LParen = std::string(DemangledName).find("(");
+  std::string FunctionName = std::string(DemangledName).substr(0, LParen);
+  size_t ScopeResolutionOperator = std::string(FunctionName).rfind("::");
+  return ScopeResolutionOperator == std::string::npos ? std::string("") : 
std::string(DemangledName).substr(0, ScopeResolutionOperator);
+};
+
+std::unordered_map> 
NamespaceToBFs;
+NamespaceToBFs.reserve(BC.getBinaryFunctions().size());
+
+for (BinaryFunction *BF : BC.getAllBinaryFunctions()) {
+  std::string* DemangledName = 
DemangleName(BF->getOneName().str().c_str());
+  if (!DemangledName)
+continue;
+  std::string Namespace = DeriveNameSpace(*DemangledName);
+  auto It = NamespaceToBFs.find(Namespace);
+  if (It == NamespaceToBFs.end())
+NamespaceToBFs[Namespace] = {BF};
+  else
+It->second.push_back(BF);
 }
 
 for (auto YamlBF : YamlBP.Functions) {
   if (YamlBF.Used)
 continue;
-  int Status = 0;
-  char *DemangledName =
-  abi::__cxa_demangle(YamlBF.Name.c_str(), nullptr, nullptr, );
-  if (Status != 0)
+  std::string* YamlBFDemangledName = DemangleName(YamlBF.Name.c_str());
+  if (!YamlBFDemangledName)
 continue;
-  auto It = NameToBinaryFunction.find(DemangledName);
-  if (It == NameToBinaryFunction.end())
+  std::string Namespace = DeriveNameSpace(*YamlBFDemangledName);
+  auto It = NamespaceToBFs.find(Namespace);
+  if (It == NamespaceToBFs.end())
 continue;
-  BinaryFunction *BF = It->second;
-  matchProfileToFunction(YamlBF, *BF);
-  ++MatchedWithDemangledName;
+  std::vector BFs = It->second;
+
+  unsigned MinEditDistance = UINT_MAX;
+  BinaryFunction *ClosestNameBF = nullptr;
+
+  for (BinaryFunction *BF : BFs) {
+if (ProfiledFunctions.count(BF))
+  continue;
+std::string *BFDemangledName = 
DemangleName(BF->getOneName().str().c_str());
+if (!BFDemangledName)
+  continue;
+unsigned BFEditDistance = 
StringRef(*BFDemangledName).edit_distance(*YamlBFDemangledName);
+if (BFEditDistance < MinEditDistance) {
+  MinEditDistance = BFEditDistance;
+  ClosestNameBF = BF;
+}
+  }
+
+  if (ClosestNameBF &&
+MinEditDistance < opts::NameSimilarityFunctionMatchingThreshold) {
+matchProfileToFunction(YamlBF, *ClosestNameBF);
+++MatchedWithDemangledName;
+  }
 }
   }
 
+  outs() << MatchedWithDemangledName  << ": functions matched by name 
similarity\n";
+
   for (yaml::bolt::BinaryFunctionProfile  : YamlBP.Functions)
 if (!YamlBF.Used && opts::Verbosity >= 1)
   errs() << "BOLT-WARNING: profile ignored for function " << YamlBF.Name

>From 2d23bbd6b9ce4f0786ae8ceb39b1b008b4ca9c4d Mon Sep 17 00:00:00 2001
From: shawbyoung 
Date: Thu, 20 Jun 2024 23:45:27 -0700
Subject: [PATCH 2/7] spr amend

Created using spr 1.3.4
---
 bolt/lib/Profile/YAMLProfileReader.cpp | 2 --
 1 file changed, 2 deletions(-)

diff --git a/bolt/lib/Profile/YAMLProfileReader.cpp 
b/bolt/lib/Profile/YAMLProfileReader.cpp
index c9f6d88f0b13a..cf4a5393df8f4 100644
--- a/bolt/lib/Profile/YAMLProfileReader.cpp
+++ b/bolt/lib/Profile/YAMLProfileReader.cpp
@@ -491,8 +491,6 @@ Error YAMLProfileReader::readProfile(BinaryContext ) {
  

[llvm-branch-commits] [libcxx] Add release note for #95264 (PR #96116)

2024-06-24 Thread Tom Stellard via llvm-branch-commits

tstellar wrote:

We don't have a great way to add release notes after the final release.  I 
added this to the release announcement do you think that is enough?

https://github.com/llvm/llvm-project/pull/96116
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [BOLT] Name similarity function matching (PR #95884)

2024-06-24 Thread shaw young via llvm-branch-commits

https://github.com/shawbyoung converted_to_draft 
https://github.com/llvm/llvm-project/pull/95884
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [BOLT][NFC] Move opts::Lite to CommandLineOpts.cpp (PR #96571)

2024-06-24 Thread shaw young via llvm-branch-commits

https://github.com/shawbyoung closed 
https://github.com/llvm/llvm-project/pull/96571
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [BOLT][NFC] Move opts::Lite to CommandLineOpts.cpp (PR #96571)

2024-06-24 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-bolt

Author: shaw young (shawbyoung)


Changes



Test Plan: n/a


---
Full diff: https://github.com/llvm/llvm-project/pull/96571.diff


2 Files Affected:

- (modified) bolt/lib/Rewrite/RewriteInstance.cpp (-3) 
- (modified) bolt/lib/Utils/CommandLineOpts.cpp (+3) 


``diff
diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp 
b/bolt/lib/Rewrite/RewriteInstance.cpp
index 1a3a8af21d81b..42c65fcd3ed97 100644
--- a/bolt/lib/Rewrite/RewriteInstance.cpp
+++ b/bolt/lib/Rewrite/RewriteInstance.cpp
@@ -140,9 +140,6 @@ KeepTmp("keep-tmp",
   cl::Hidden,
   cl::cat(BoltCategory));
 
-cl::opt Lite("lite", cl::desc("skip processing of cold functions"),
-   cl::cat(BoltCategory));
-
 static cl::opt
 LiteThresholdPct("lite-threshold-pct",
   cl::desc("threshold (in percent) for selecting functions to process in lite "
diff --git a/bolt/lib/Utils/CommandLineOpts.cpp 
b/bolt/lib/Utils/CommandLineOpts.cpp
index 41c89bc8aeba4..b9bc79f408a6b 100644
--- a/bolt/lib/Utils/CommandLineOpts.cpp
+++ b/bolt/lib/Utils/CommandLineOpts.cpp
@@ -128,6 +128,9 @@ cl::opt
cl::desc("instrument code to generate accurate profile data"),
cl::cat(BoltOptCategory));
 
+cl::opt Lite("lite", cl::desc("skip processing of cold functions"),
+   cl::cat(BoltCategory));
+
 cl::opt
 OutputFilename("o",
   cl::desc(""),

``




https://github.com/llvm/llvm-project/pull/96571
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [BOLT][NFC] Move opts::Lite to CommandLineOpts.cpp (PR #96571)

2024-06-24 Thread shaw young via llvm-branch-commits

https://github.com/shawbyoung created 
https://github.com/llvm/llvm-project/pull/96571



Test Plan: n/a



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [BOLT] Hash-based function matching (PR #95821)

2024-06-24 Thread shaw young via llvm-branch-commits

https://github.com/shawbyoung updated 
https://github.com/llvm/llvm-project/pull/95821

>From 92212c96ea169d26ac10bf8d750539bc5dd72c49 Mon Sep 17 00:00:00 2001
From: shawbyoung 
Date: Mon, 17 Jun 2024 15:39:02 -0700
Subject: [PATCH 01/20] spr amend

Created using spr 1.3.4
---
 bolt/lib/Profile/YAMLProfileReader.cpp | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/bolt/lib/Profile/YAMLProfileReader.cpp 
b/bolt/lib/Profile/YAMLProfileReader.cpp
index f0fcb1c130002..2bca83c9d11ec 100644
--- a/bolt/lib/Profile/YAMLProfileReader.cpp
+++ b/bolt/lib/Profile/YAMLProfileReader.cpp
@@ -421,6 +421,8 @@ Error YAMLProfileReader::readProfile(BinaryContext ) {
   StrictBinaryFunctionHashes.reserve(BC.getBinaryFunctions().size());
 
   for (auto& [_, BF] : BC.getBinaryFunctions()) {
+if (!ProfiledFunctions.count())
+  continue;
 StrictBinaryFunctionHashes[BF.getHash()] = 
   }
 

>From 2497922ccc46e3189870563b1fe819b67172778d Mon Sep 17 00:00:00 2001
From: shawbyoung 
Date: Mon, 17 Jun 2024 15:39:39 -0700
Subject: [PATCH 02/20] spr amend

Created using spr 1.3.4
---
 bolt/lib/Profile/YAMLProfileReader.cpp | 7 ---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/bolt/lib/Profile/YAMLProfileReader.cpp 
b/bolt/lib/Profile/YAMLProfileReader.cpp
index 2bca83c9d11ec..56474a67307ed 100644
--- a/bolt/lib/Profile/YAMLProfileReader.cpp
+++ b/bolt/lib/Profile/YAMLProfileReader.cpp
@@ -417,10 +417,10 @@ Error YAMLProfileReader::readProfile(BinaryContext ) {
 
   // Uses the strict hash of profiled and binary functions to match functions
   // that are not matched by name or common name.
-  std::unordered_map StrictBinaryFunctionHashes;
+  std::unordered_map StrictBinaryFunctionHashes;
   StrictBinaryFunctionHashes.reserve(BC.getBinaryFunctions().size());
 
-  for (auto& [_, BF] : BC.getBinaryFunctions()) {
+  for (auto &[_, BF] : BC.getBinaryFunctions()) {
 if (!ProfiledFunctions.count())
   continue;
 StrictBinaryFunctionHashes[BF.getHash()] = 
@@ -428,7 +428,8 @@ Error YAMLProfileReader::readProfile(BinaryContext ) {
 
   for (auto YamlBF : YamlBP.Functions) {
 auto It = StrictBinaryFunctionHashes.find(YamlBF.Hash);
-if (It != StrictBinaryFunctionHashes.end() && 
!ProfiledFunctions.count(It->second)) {
+if (It != StrictBinaryFunctionHashes.end() &&
+!ProfiledFunctions.count(It->second)) {
   auto *BF = It->second;
   matchProfileToFunction(YamlBF, *BF);
 }

>From 8e7b2229a69c3795e723404c56e0d4298eef412a Mon Sep 17 00:00:00 2001
From: shawbyoung 
Date: Mon, 17 Jun 2024 15:55:58 -0700
Subject: [PATCH 03/20] spr amend

Created using spr 1.3.4
---
 bolt/lib/Profile/YAMLProfileReader.cpp   | 2 +-
 bolt/test/X86/profile-passthrough-block.test | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/bolt/lib/Profile/YAMLProfileReader.cpp 
b/bolt/lib/Profile/YAMLProfileReader.cpp
index 56474a67307ed..779d60bce3b66 100644
--- a/bolt/lib/Profile/YAMLProfileReader.cpp
+++ b/bolt/lib/Profile/YAMLProfileReader.cpp
@@ -421,7 +421,7 @@ Error YAMLProfileReader::readProfile(BinaryContext ) {
   StrictBinaryFunctionHashes.reserve(BC.getBinaryFunctions().size());
 
   for (auto &[_, BF] : BC.getBinaryFunctions()) {
-if (!ProfiledFunctions.count())
+if (ProfiledFunctions.count())
   continue;
 StrictBinaryFunctionHashes[BF.getHash()] = 
   }
diff --git a/bolt/test/X86/profile-passthrough-block.test 
b/bolt/test/X86/profile-passthrough-block.test
index 1b875885260dc..ed2a8117ddfc4 100644
--- a/bolt/test/X86/profile-passthrough-block.test
+++ b/bolt/test/X86/profile-passthrough-block.test
@@ -57,7 +57,7 @@ header:
 functions:
   - name:main
 fid: 0
-hash:0x
+hash:0x0001
 exec:1
 nblocks: 6
 blocks:

>From ef5f0dac9185dbb7a62345938d4f309c3379a85d Mon Sep 17 00:00:00 2001
From: shawbyoung 
Date: Mon, 17 Jun 2024 15:58:22 -0700
Subject: [PATCH 04/20] spr amend

Created using spr 1.3.4
---
 bolt/lib/Profile/YAMLProfileReader.cpp | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/bolt/lib/Profile/YAMLProfileReader.cpp 
b/bolt/lib/Profile/YAMLProfileReader.cpp
index 779d60bce3b66..e3d30bfdb74e4 100644
--- a/bolt/lib/Profile/YAMLProfileReader.cpp
+++ b/bolt/lib/Profile/YAMLProfileReader.cpp
@@ -427,6 +427,8 @@ Error YAMLProfileReader::readProfile(BinaryContext ) {
   }
 
   for (auto YamlBF : YamlBP.Functions) {
+if (YamlBF.Used)
+  continue;
 auto It = StrictBinaryFunctionHashes.find(YamlBF.Hash);
 if (It != StrictBinaryFunctionHashes.end() &&
 !ProfiledFunctions.count(It->second)) {

>From 41ce2897a445e47dfe685da66b4af080824e78ed Mon Sep 17 00:00:00 2001
From: shawbyoung 
Date: Mon, 17 Jun 2024 16:00:27 -0700
Subject: [PATCH 05/20] spr amend

Created using spr 1.3.4
---
 bolt/test/X86/profile-passthrough-block.test | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git 

[llvm-branch-commits] [llvm] [BOLT] Hash-based function matching (PR #95821)

2024-06-24 Thread shaw young via llvm-branch-commits

https://github.com/shawbyoung updated 
https://github.com/llvm/llvm-project/pull/95821

>From 92212c96ea169d26ac10bf8d750539bc5dd72c49 Mon Sep 17 00:00:00 2001
From: shawbyoung 
Date: Mon, 17 Jun 2024 15:39:02 -0700
Subject: [PATCH 01/20] spr amend

Created using spr 1.3.4
---
 bolt/lib/Profile/YAMLProfileReader.cpp | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/bolt/lib/Profile/YAMLProfileReader.cpp 
b/bolt/lib/Profile/YAMLProfileReader.cpp
index f0fcb1c130002..2bca83c9d11ec 100644
--- a/bolt/lib/Profile/YAMLProfileReader.cpp
+++ b/bolt/lib/Profile/YAMLProfileReader.cpp
@@ -421,6 +421,8 @@ Error YAMLProfileReader::readProfile(BinaryContext ) {
   StrictBinaryFunctionHashes.reserve(BC.getBinaryFunctions().size());
 
   for (auto& [_, BF] : BC.getBinaryFunctions()) {
+if (!ProfiledFunctions.count())
+  continue;
 StrictBinaryFunctionHashes[BF.getHash()] = 
   }
 

>From 2497922ccc46e3189870563b1fe819b67172778d Mon Sep 17 00:00:00 2001
From: shawbyoung 
Date: Mon, 17 Jun 2024 15:39:39 -0700
Subject: [PATCH 02/20] spr amend

Created using spr 1.3.4
---
 bolt/lib/Profile/YAMLProfileReader.cpp | 7 ---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/bolt/lib/Profile/YAMLProfileReader.cpp 
b/bolt/lib/Profile/YAMLProfileReader.cpp
index 2bca83c9d11ec..56474a67307ed 100644
--- a/bolt/lib/Profile/YAMLProfileReader.cpp
+++ b/bolt/lib/Profile/YAMLProfileReader.cpp
@@ -417,10 +417,10 @@ Error YAMLProfileReader::readProfile(BinaryContext ) {
 
   // Uses the strict hash of profiled and binary functions to match functions
   // that are not matched by name or common name.
-  std::unordered_map StrictBinaryFunctionHashes;
+  std::unordered_map StrictBinaryFunctionHashes;
   StrictBinaryFunctionHashes.reserve(BC.getBinaryFunctions().size());
 
-  for (auto& [_, BF] : BC.getBinaryFunctions()) {
+  for (auto &[_, BF] : BC.getBinaryFunctions()) {
 if (!ProfiledFunctions.count())
   continue;
 StrictBinaryFunctionHashes[BF.getHash()] = 
@@ -428,7 +428,8 @@ Error YAMLProfileReader::readProfile(BinaryContext ) {
 
   for (auto YamlBF : YamlBP.Functions) {
 auto It = StrictBinaryFunctionHashes.find(YamlBF.Hash);
-if (It != StrictBinaryFunctionHashes.end() && 
!ProfiledFunctions.count(It->second)) {
+if (It != StrictBinaryFunctionHashes.end() &&
+!ProfiledFunctions.count(It->second)) {
   auto *BF = It->second;
   matchProfileToFunction(YamlBF, *BF);
 }

>From 8e7b2229a69c3795e723404c56e0d4298eef412a Mon Sep 17 00:00:00 2001
From: shawbyoung 
Date: Mon, 17 Jun 2024 15:55:58 -0700
Subject: [PATCH 03/20] spr amend

Created using spr 1.3.4
---
 bolt/lib/Profile/YAMLProfileReader.cpp   | 2 +-
 bolt/test/X86/profile-passthrough-block.test | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/bolt/lib/Profile/YAMLProfileReader.cpp 
b/bolt/lib/Profile/YAMLProfileReader.cpp
index 56474a67307ed..779d60bce3b66 100644
--- a/bolt/lib/Profile/YAMLProfileReader.cpp
+++ b/bolt/lib/Profile/YAMLProfileReader.cpp
@@ -421,7 +421,7 @@ Error YAMLProfileReader::readProfile(BinaryContext ) {
   StrictBinaryFunctionHashes.reserve(BC.getBinaryFunctions().size());
 
   for (auto &[_, BF] : BC.getBinaryFunctions()) {
-if (!ProfiledFunctions.count())
+if (ProfiledFunctions.count())
   continue;
 StrictBinaryFunctionHashes[BF.getHash()] = 
   }
diff --git a/bolt/test/X86/profile-passthrough-block.test 
b/bolt/test/X86/profile-passthrough-block.test
index 1b875885260dc..ed2a8117ddfc4 100644
--- a/bolt/test/X86/profile-passthrough-block.test
+++ b/bolt/test/X86/profile-passthrough-block.test
@@ -57,7 +57,7 @@ header:
 functions:
   - name:main
 fid: 0
-hash:0x
+hash:0x0001
 exec:1
 nblocks: 6
 blocks:

>From ef5f0dac9185dbb7a62345938d4f309c3379a85d Mon Sep 17 00:00:00 2001
From: shawbyoung 
Date: Mon, 17 Jun 2024 15:58:22 -0700
Subject: [PATCH 04/20] spr amend

Created using spr 1.3.4
---
 bolt/lib/Profile/YAMLProfileReader.cpp | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/bolt/lib/Profile/YAMLProfileReader.cpp 
b/bolt/lib/Profile/YAMLProfileReader.cpp
index 779d60bce3b66..e3d30bfdb74e4 100644
--- a/bolt/lib/Profile/YAMLProfileReader.cpp
+++ b/bolt/lib/Profile/YAMLProfileReader.cpp
@@ -427,6 +427,8 @@ Error YAMLProfileReader::readProfile(BinaryContext ) {
   }
 
   for (auto YamlBF : YamlBP.Functions) {
+if (YamlBF.Used)
+  continue;
 auto It = StrictBinaryFunctionHashes.find(YamlBF.Hash);
 if (It != StrictBinaryFunctionHashes.end() &&
 !ProfiledFunctions.count(It->second)) {

>From 41ce2897a445e47dfe685da66b4af080824e78ed Mon Sep 17 00:00:00 2001
From: shawbyoung 
Date: Mon, 17 Jun 2024 16:00:27 -0700
Subject: [PATCH 05/20] spr amend

Created using spr 1.3.4
---
 bolt/test/X86/profile-passthrough-block.test | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git 

[llvm-branch-commits] [llvm] AMDGPU: Add subtarget feature for memory atomic fadd f64 (PR #96444)

2024-06-24 Thread Stanislav Mekhanoshin via llvm-branch-commits

https://github.com/rampitec approved this pull request.


https://github.com/llvm/llvm-project/pull/96444
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] AMDGPU: Add subtarget feature for memory atomic fadd f64 (PR #96444)

2024-06-24 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm updated 
https://github.com/llvm/llvm-project/pull/96444

>From baaf96125e8f913a161f1c13216618a3de128182 Mon Sep 17 00:00:00 2001
From: Matt Arsenault 
Date: Sun, 23 Jun 2024 17:07:53 +0200
Subject: [PATCH] AMDGPU: Add subtarget feature for memory atomic fadd f64

---
 llvm/lib/Target/AMDGPU/AMDGPU.td   | 21 ++---
 llvm/lib/Target/AMDGPU/BUFInstructions.td  | 10 ++
 llvm/lib/Target/AMDGPU/FLATInstructions.td |  6 +++---
 llvm/lib/Target/AMDGPU/GCNSubtarget.h  | 10 +++---
 llvm/lib/Target/AMDGPU/SIISelLowering.cpp  |  2 +-
 5 files changed, 31 insertions(+), 18 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
index 0ec65f759bc35..9aaeaf73287d5 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -788,6 +788,13 @@ def FeatureFlatAtomicFaddF32Inst
   "Has flat_atomic_add_f32 instruction"
 >;
 
+def FeatureFlatBufferGlobalAtomicFaddF64Inst
+  : SubtargetFeature<"flat-buffer-global-fadd-f64-inst",
+  "HasFlatBufferGlobalAtomicFaddF64Inst",
+  "true",
+  "Has flat, buffer, and global instructions for f64 atomic fadd"
+>;
+
 def FeatureMemoryAtomicFaddF32DenormalSupport
   : SubtargetFeature<"memory-atomic-fadd-f32-denormal-support",
   "HasAtomicMemoryAtomicFaddF32DenormalSupport",
@@ -1388,7 +1395,8 @@ def FeatureISAVersion9_0_A : FeatureSet<
  FeatureBackOffBarrier,
  FeatureKernargPreload,
  FeatureAtomicFMinFMaxF64GlobalInsts,
- FeatureAtomicFMinFMaxF64FlatInsts
+ FeatureAtomicFMinFMaxF64FlatInsts,
+ FeatureFlatBufferGlobalAtomicFaddF64Inst
  ])>;
 
 def FeatureISAVersion9_0_C : FeatureSet<
@@ -1433,7 +1441,8 @@ def FeatureISAVersion9_4_Common : FeatureSet<
FeatureAtomicFMinFMaxF64GlobalInsts,
FeatureAtomicFMinFMaxF64FlatInsts,
FeatureAgentScopeFineGrainedRemoteMemoryAtomics,
-   FeatureMemoryAtomicFaddF32DenormalSupport
+   FeatureMemoryAtomicFaddF32DenormalSupport,
+   FeatureFlatBufferGlobalAtomicFaddF64Inst
]>;
 
 def FeatureISAVersion9_4_0 : FeatureSet<
@@ -1928,11 +1937,9 @@ def isGFX12Plus :
 def HasFlatAddressSpace : Predicate<"Subtarget->hasFlatAddressSpace()">,
   AssemblerPredicate<(all_of FeatureFlatAddressSpace)>;
 
-
-def HasBufferFlatGlobalAtomicsF64 : // FIXME: Rename to show it's only for fadd
-  Predicate<"Subtarget->hasBufferFlatGlobalAtomicsF64()">,
-  // FIXME: This is too coarse, and working around using pseudo's predicates 
on real instruction.
-  AssemblerPredicate<(any_of FeatureGFX90AInsts, FeatureGFX10Insts, 
FeatureSouthernIslands, FeatureSeaIslands)>;
+def HasFlatBufferGlobalAtomicFaddF64Inst :
+  Predicate<"Subtarget->hasFlatBufferGlobalAtomicFaddF64Inst()">,
+  AssemblerPredicate<(any_of FeatureFlatBufferGlobalAtomicFaddF64Inst)>;
 
 def HasAtomicFMinFMaxF32GlobalInsts :
   Predicate<"Subtarget->hasAtomicFMinFMaxF32GlobalInsts()">,
diff --git a/llvm/lib/Target/AMDGPU/BUFInstructions.td 
b/llvm/lib/Target/AMDGPU/BUFInstructions.td
index 3b8d94b744000..a904c8483dbf5 100644
--- a/llvm/lib/Target/AMDGPU/BUFInstructions.td
+++ b/llvm/lib/Target/AMDGPU/BUFInstructions.td
@@ -1312,14 +1312,16 @@ let SubtargetPredicate = isGFX90APlus in {
   }
 } // End SubtargetPredicate = isGFX90APlus
 
-let SubtargetPredicate = HasBufferFlatGlobalAtomicsF64 in {
+let SubtargetPredicate = HasFlatBufferGlobalAtomicFaddF64Inst in {
   defm BUFFER_ATOMIC_ADD_F64 : MUBUF_Pseudo_Atomics<"buffer_atomic_add_f64", 
VReg_64, f64>;
+} // End SubtargetPredicate = HasFlatBufferGlobalAtomicFaddF64Inst
 
+let SubtargetPredicate = HasAtomicFMinFMaxF64GlobalInsts in {
   // Note the names can be buffer_atomic_fmin_x2/buffer_atomic_fmax_x2
   // depending on some subtargets.
   defm BUFFER_ATOMIC_MIN_F64 : MUBUF_Pseudo_Atomics<"buffer_atomic_min_f64", 
VReg_64, f64>;
   defm BUFFER_ATOMIC_MAX_F64 : MUBUF_Pseudo_Atomics<"buffer_atomic_max_f64", 
VReg_64, f64>;
-} // End SubtargetPredicate = HasBufferFlatGlobalAtomicsF64
+}
 
 def BUFFER_INV : MUBUF_Invalidate<"buffer_inv"> {
   let SubtargetPredicate = isGFX940Plus;
@@ -1836,9 +1838,9 @@ let SubtargetPredicate = 
HasAtomicBufferGlobalPkAddF16Insts in {
   defm : SIBufferAtomicPat<"SIbuffer_atomic_fadd", v2f16, 
"BUFFER_ATOMIC_PK_ADD_F16", ["ret"]>;
 } // End SubtargetPredicate = HasAtomicBufferGlobalPkAddF16Insts
 
-let SubtargetPredicate = HasBufferFlatGlobalAtomicsF64 in {
+let SubtargetPredicate = HasFlatBufferGlobalAtomicFaddF64Inst in {
   defm : SIBufferAtomicPat<"SIbuffer_atomic_fadd", f64, 
"BUFFER_ATOMIC_ADD_F64">;
-} // End SubtargetPredicate = HasBufferFlatGlobalAtomicsF64
+} // End SubtargetPredicate = HasFlatBufferGlobalAtomicFaddF64Inst
 
 let SubtargetPredicate = HasAtomicFMinFMaxF64GlobalInsts in {
   defm : SIBufferAtomicPat<"SIbuffer_atomic_fmin", f64, 
"BUFFER_ATOMIC_MIN_F64">;
diff --git a/llvm/lib/Target/AMDGPU/FLATInstructions.td 
b/llvm/lib/Target/AMDGPU/FLATInstructions.td
index 98054dde398b3..89946a4719557 100644
--- 

[llvm-branch-commits] Reapply "[llvm][RISCV] Enable trailing fences for seq-cst stores by default (#87376)" (PR #90267)

2024-06-24 Thread Paul Kirth via llvm-branch-commits

https://github.com/ilovepi updated 
https://github.com/llvm/llvm-project/pull/90267


___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] Reapply "[llvm][RISCV] Enable trailing fences for seq-cst stores by default (#87376)" (PR #90267)

2024-06-24 Thread Paul Kirth via llvm-branch-commits

https://github.com/ilovepi edited 
https://github.com/llvm/llvm-project/pull/90267
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] Reapply "[llvm][RISCV] Enable trailing fences for seq-cst stores by default (#87376)" (PR #90267)

2024-06-24 Thread Paul Kirth via llvm-branch-commits

https://github.com/ilovepi updated 
https://github.com/llvm/llvm-project/pull/90267


___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [libcxx] Add release note for #95264 (PR #96116)

2024-06-24 Thread Louis Dionne via llvm-branch-commits

ldionne wrote:

Gentle ping @tstellar, do we want to merge this?

The CI issues are unrelated.

https://github.com/llvm/llvm-project/pull/96116
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] AMDGPU: Add subtarget feature for global atomic fadd denormal support (PR #96443)

2024-06-24 Thread Stanislav Mekhanoshin via llvm-branch-commits

https://github.com/rampitec approved this pull request.


https://github.com/llvm/llvm-project/pull/96443
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [flang] 0f6f6dd - Revert "[Flang][Driver] Add -print-resource-dir command line flag to emit Fla…"

2024-06-24 Thread via llvm-branch-commits

Author: David Truby
Date: 2024-06-24T21:53:39+01:00
New Revision: 0f6f6ddbc0d84d2df23df8c8a771ace3c0dca988

URL: 
https://github.com/llvm/llvm-project/commit/0f6f6ddbc0d84d2df23df8c8a771ace3c0dca988
DIFF: 
https://github.com/llvm/llvm-project/commit/0f6f6ddbc0d84d2df23df8c8a771ace3c0dca988.diff

LOG: Revert "[Flang][Driver] Add -print-resource-dir command line flag to emit 
Fla…"

This reverts commit 2df06e42d733a1f7a1cdf715894921a5bbbc2956.

Added: 


Modified: 
clang/include/clang/Driver/Driver.h
clang/include/clang/Driver/Options.td
clang/lib/Driver/Driver.cpp

Removed: 
flang/test/Driver/print-resource-dir.F90



diff  --git a/clang/include/clang/Driver/Driver.h 
b/clang/include/clang/Driver/Driver.h
index 084c3ffe69ae8..cc1538372d5f8 100644
--- a/clang/include/clang/Driver/Driver.h
+++ b/clang/include/clang/Driver/Driver.h
@@ -747,9 +747,6 @@ class Driver {
   /// option.
   void setDriverMode(StringRef DriverModeValue);
 
-  /// Set the resource directory, depending on which driver is being used.
-  void setResourceDirectory();
-
   /// Parse the \p Args list for LTO options and record the type of LTO
   /// compilation based on which -f(no-)?lto(=.*)? option occurs last.
   void setLTOMode(const llvm::opt::ArgList );

diff  --git a/clang/include/clang/Driver/Options.td 
b/clang/include/clang/Driver/Options.td
index 6416261077ed1..dfdf0741e28eb 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -5535,10 +5535,7 @@ def print_prog_name_EQ : Joined<["-", "--"], 
"print-prog-name=">,
   Visibility<[ClangOption, CLOption]>;
 def print_resource_dir : Flag<["-", "--"], "print-resource-dir">,
   HelpText<"Print the resource directory pathname">,
-  HelpTextForVariants<[FlangOption],
-  "Print the resource directory pathname that contains lib 
and "
-  "include directories with the runtime libraries and 
MODULE files.">,
-  Visibility<[ClangOption, CLOption, FlangOption]>;
+  Visibility<[ClangOption, CLOption]>;
 def print_search_dirs : Flag<["-", "--"], "print-search-dirs">,
   HelpText<"Print the paths used for finding libraries and programs">,
   Visibility<[ClangOption, CLOption]>;

diff  --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp
index 0298d22203d9d..33ab7cc3f3968 100644
--- a/clang/lib/Driver/Driver.cpp
+++ b/clang/lib/Driver/Driver.cpp
@@ -229,6 +229,9 @@ Driver::Driver(StringRef ClangExecutable, StringRef 
TargetTriple,
 UserConfigDir = static_cast(P);
   }
 #endif
+
+  // Compute the path to the resource directory.
+  ResourceDir = GetResourcesPath(ClangExecutable, CLANG_RESOURCE_DIR);
 }
 
 void Driver::setDriverMode(StringRef Value) {
@@ -247,24 +250,6 @@ void Driver::setDriverMode(StringRef Value) {
 Diag(diag::err_drv_unsupported_option_argument) << OptName << Value;
 }
 
-void Driver::setResourceDirectory() {
-  // Compute the path to the resource directory, depending on the driver mode.
-  switch (Mode) {
-  case GCCMode:
-  case GXXMode:
-  case CPPMode:
-  case CLMode:
-  case DXCMode:
-ResourceDir = GetResourcesPath(ClangExecutable, CLANG_RESOURCE_DIR);
-break;
-  case FlangMode:
-SmallString<64> customResourcePathRelativeToDriver{".."};
-ResourceDir =
-GetResourcesPath(ClangExecutable, customResourcePathRelativeToDriver);
-break;
-  }
-}
-
 InputArgList Driver::ParseArgStrings(ArrayRef ArgStrings,
  bool UseDriverMode, bool ) {
   llvm::PrettyStackTraceString CrashInfo("Command line argument parsing");
@@ -1217,7 +1202,6 @@ Compilation *Driver::BuildCompilation(ArrayRef ArgList) {
   if (!DriverMode.empty())
 setDriverMode(DriverMode);
 
-  setResourceDirectory();
   // FIXME: What are we going to do with -V and -b?
 
   // Arguments specified in command line.

diff  --git a/flang/test/Driver/print-resource-dir.F90 
b/flang/test/Driver/print-resource-dir.F90
deleted file mode 100644
index 8fd35f1800df2..0
--- a/flang/test/Driver/print-resource-dir.F90
+++ /dev/null
@@ -1,4 +0,0 @@
-! DEFINE: %{resource_dir} = %S/Inputs/resource_dir
-! RUN: %flang -print-resource-dir -resource-dir=%{resource_dir}.. \
-! RUN:  | FileCheck -check-prefix=PRINT-RESOURCE-DIR -DFILE=%{resource_dir} %s
-! PRINT-RESOURCE-DIR: [[FILE]]



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] AMDGPU: Add subtarget feature for global atomic fadd denormal support (PR #96443)

2024-06-24 Thread Matt Arsenault via llvm-branch-commits

arsenm wrote:

> It is worse than that. It behaves differently depending on where atomic is 
> executed. There is no single answer if this instruction supports denorms or 
> not.

That doesn't matter. The flat case that sometimes flushes is just a no. 
Flushing is never a guarantee, we only need to know a flush may happen 

https://github.com/llvm/llvm-project/pull/96443
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [BOLT] Hash-based function matching (PR #95821)

2024-06-24 Thread shaw young via llvm-branch-commits

https://github.com/shawbyoung updated 
https://github.com/llvm/llvm-project/pull/95821

>From 92212c96ea169d26ac10bf8d750539bc5dd72c49 Mon Sep 17 00:00:00 2001
From: shawbyoung 
Date: Mon, 17 Jun 2024 15:39:02 -0700
Subject: [PATCH 01/20] spr amend

Created using spr 1.3.4
---
 bolt/lib/Profile/YAMLProfileReader.cpp | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/bolt/lib/Profile/YAMLProfileReader.cpp 
b/bolt/lib/Profile/YAMLProfileReader.cpp
index f0fcb1c130002..2bca83c9d11ec 100644
--- a/bolt/lib/Profile/YAMLProfileReader.cpp
+++ b/bolt/lib/Profile/YAMLProfileReader.cpp
@@ -421,6 +421,8 @@ Error YAMLProfileReader::readProfile(BinaryContext ) {
   StrictBinaryFunctionHashes.reserve(BC.getBinaryFunctions().size());
 
   for (auto& [_, BF] : BC.getBinaryFunctions()) {
+if (!ProfiledFunctions.count())
+  continue;
 StrictBinaryFunctionHashes[BF.getHash()] = 
   }
 

>From 2497922ccc46e3189870563b1fe819b67172778d Mon Sep 17 00:00:00 2001
From: shawbyoung 
Date: Mon, 17 Jun 2024 15:39:39 -0700
Subject: [PATCH 02/20] spr amend

Created using spr 1.3.4
---
 bolt/lib/Profile/YAMLProfileReader.cpp | 7 ---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/bolt/lib/Profile/YAMLProfileReader.cpp 
b/bolt/lib/Profile/YAMLProfileReader.cpp
index 2bca83c9d11ec..56474a67307ed 100644
--- a/bolt/lib/Profile/YAMLProfileReader.cpp
+++ b/bolt/lib/Profile/YAMLProfileReader.cpp
@@ -417,10 +417,10 @@ Error YAMLProfileReader::readProfile(BinaryContext ) {
 
   // Uses the strict hash of profiled and binary functions to match functions
   // that are not matched by name or common name.
-  std::unordered_map StrictBinaryFunctionHashes;
+  std::unordered_map StrictBinaryFunctionHashes;
   StrictBinaryFunctionHashes.reserve(BC.getBinaryFunctions().size());
 
-  for (auto& [_, BF] : BC.getBinaryFunctions()) {
+  for (auto &[_, BF] : BC.getBinaryFunctions()) {
 if (!ProfiledFunctions.count())
   continue;
 StrictBinaryFunctionHashes[BF.getHash()] = 
@@ -428,7 +428,8 @@ Error YAMLProfileReader::readProfile(BinaryContext ) {
 
   for (auto YamlBF : YamlBP.Functions) {
 auto It = StrictBinaryFunctionHashes.find(YamlBF.Hash);
-if (It != StrictBinaryFunctionHashes.end() && 
!ProfiledFunctions.count(It->second)) {
+if (It != StrictBinaryFunctionHashes.end() &&
+!ProfiledFunctions.count(It->second)) {
   auto *BF = It->second;
   matchProfileToFunction(YamlBF, *BF);
 }

>From 8e7b2229a69c3795e723404c56e0d4298eef412a Mon Sep 17 00:00:00 2001
From: shawbyoung 
Date: Mon, 17 Jun 2024 15:55:58 -0700
Subject: [PATCH 03/20] spr amend

Created using spr 1.3.4
---
 bolt/lib/Profile/YAMLProfileReader.cpp   | 2 +-
 bolt/test/X86/profile-passthrough-block.test | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/bolt/lib/Profile/YAMLProfileReader.cpp 
b/bolt/lib/Profile/YAMLProfileReader.cpp
index 56474a67307ed..779d60bce3b66 100644
--- a/bolt/lib/Profile/YAMLProfileReader.cpp
+++ b/bolt/lib/Profile/YAMLProfileReader.cpp
@@ -421,7 +421,7 @@ Error YAMLProfileReader::readProfile(BinaryContext ) {
   StrictBinaryFunctionHashes.reserve(BC.getBinaryFunctions().size());
 
   for (auto &[_, BF] : BC.getBinaryFunctions()) {
-if (!ProfiledFunctions.count())
+if (ProfiledFunctions.count())
   continue;
 StrictBinaryFunctionHashes[BF.getHash()] = 
   }
diff --git a/bolt/test/X86/profile-passthrough-block.test 
b/bolt/test/X86/profile-passthrough-block.test
index 1b875885260dc..ed2a8117ddfc4 100644
--- a/bolt/test/X86/profile-passthrough-block.test
+++ b/bolt/test/X86/profile-passthrough-block.test
@@ -57,7 +57,7 @@ header:
 functions:
   - name:main
 fid: 0
-hash:0x
+hash:0x0001
 exec:1
 nblocks: 6
 blocks:

>From ef5f0dac9185dbb7a62345938d4f309c3379a85d Mon Sep 17 00:00:00 2001
From: shawbyoung 
Date: Mon, 17 Jun 2024 15:58:22 -0700
Subject: [PATCH 04/20] spr amend

Created using spr 1.3.4
---
 bolt/lib/Profile/YAMLProfileReader.cpp | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/bolt/lib/Profile/YAMLProfileReader.cpp 
b/bolt/lib/Profile/YAMLProfileReader.cpp
index 779d60bce3b66..e3d30bfdb74e4 100644
--- a/bolt/lib/Profile/YAMLProfileReader.cpp
+++ b/bolt/lib/Profile/YAMLProfileReader.cpp
@@ -427,6 +427,8 @@ Error YAMLProfileReader::readProfile(BinaryContext ) {
   }
 
   for (auto YamlBF : YamlBP.Functions) {
+if (YamlBF.Used)
+  continue;
 auto It = StrictBinaryFunctionHashes.find(YamlBF.Hash);
 if (It != StrictBinaryFunctionHashes.end() &&
 !ProfiledFunctions.count(It->second)) {

>From 41ce2897a445e47dfe685da66b4af080824e78ed Mon Sep 17 00:00:00 2001
From: shawbyoung 
Date: Mon, 17 Jun 2024 16:00:27 -0700
Subject: [PATCH 05/20] spr amend

Created using spr 1.3.4
---
 bolt/test/X86/profile-passthrough-block.test | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git 

[llvm-branch-commits] [mlir] [mlir][Transforms][NFC] Dialect Conversion: Move argument materialization logic (PR #96329)

2024-06-24 Thread Oleksandr Alex Zinenko via llvm-branch-commits

https://github.com/ftynse approved this pull request.


https://github.com/llvm/llvm-project/pull/96329
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [sanitizer] Rename DEFINE_REAL_PTHREAD_FUNCTIONS (PR #96527)

2024-06-24 Thread Florian Mayer via llvm-branch-commits

https://github.com/fmayer approved this pull request.


https://github.com/llvm/llvm-project/pull/96527
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] AMDGPU: Add a subtarget feature for fine-grained remote memory support (PR #96442)

2024-06-24 Thread Yaxun Liu via llvm-branch-commits


@@ -788,6 +788,14 @@ def FeatureFlatAtomicFaddF32Inst
   "Has flat_atomic_add_f32 instruction"
 >;
 
+def FeatureAgentScopeFineGrainedRemoteMemoryAtomics
+  : SubtargetFeature<"agent-scope-fine-grained-remote-memory-atomics",
+  "HasAgentScopeFineGrainedRemoteMemoryAtomics",
+  "true",
+  "Agent (device) scoped atomic operations not directly supported by "

yxsamliu wrote:

I feel the description is a little bit confusing, at least for me.

how about

"Agent (device) scoped atomic operations, excluding those directly supported by 
PCIe (i.e., integer atomic add, exchange, and compare-and-swap), are functional 
for allocations in host or peer PCIe device memory."

https://github.com/llvm/llvm-project/pull/96442
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [sanitizer] Rename DEFINE_REAL_PTHREAD_FUNCTIONS (PR #96527)

2024-06-24 Thread Vitaly Buka via llvm-branch-commits

https://github.com/vitalybuka edited 
https://github.com/llvm/llvm-project/pull/96527
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [sanitizer] Rename DEFINE_REAL_PTHREAD_FUNCTIONS (PR #96527)

2024-06-24 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-pgo

Author: Vitaly Buka (vitalybuka)


Changes

We use REAL() calls in interceptors, but
DEFINE_REAL_PTHREAD_FUNCTIONS has nothing to do
with them and only used for internal maintenance
threads.


---
Full diff: https://github.com/llvm/llvm-project/pull/96527.diff


9 Files Affected:

- (modified) compiler-rt/lib/asan/asan_interceptors.cpp (+1-1) 
- (modified) compiler-rt/lib/hwasan/hwasan_interceptors.cpp (+1-1) 
- (modified) compiler-rt/lib/lsan/lsan_interceptors.cpp (+1-1) 
- (modified) compiler-rt/lib/memprof/memprof_interceptors.cpp (+1-1) 
- (modified) compiler-rt/lib/msan/msan_interceptors.cpp (+1-1) 
- (modified) compiler-rt/lib/sanitizer_common/sanitizer_common_libcdep.cpp 
(+2-2) 
- (modified) compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp (+4-4) 
- (modified) compiler-rt/lib/sanitizer_common/sanitizer_posix.h (+14-14) 
- (modified) compiler-rt/lib/tsan/rtl/tsan_interceptors_posix.cpp (+4-4) 


``diff
diff --git a/compiler-rt/lib/asan/asan_interceptors.cpp 
b/compiler-rt/lib/asan/asan_interceptors.cpp
index 6d1360e104975..f8f86a766b204 100644
--- a/compiler-rt/lib/asan/asan_interceptors.cpp
+++ b/compiler-rt/lib/asan/asan_interceptors.cpp
@@ -333,7 +333,7 @@ INTERCEPTOR(int, pthread_timedjoin_np, void *thread, void 
**ret,
 }
 #endif
 
-DEFINE_REAL_PTHREAD_FUNCTIONS
+DEFINE_INTERNAL_PTHREAD_FUNCTIONS
 #endif  // ASAN_INTERCEPT_PTHREAD_CREATE
 
 #if ASAN_INTERCEPT_SWAPCONTEXT
diff --git a/compiler-rt/lib/hwasan/hwasan_interceptors.cpp 
b/compiler-rt/lib/hwasan/hwasan_interceptors.cpp
index 08ae435b8214a..c10b5c158548e 100644
--- a/compiler-rt/lib/hwasan/hwasan_interceptors.cpp
+++ b/compiler-rt/lib/hwasan/hwasan_interceptors.cpp
@@ -334,7 +334,7 @@ INTERCEPTOR(int, pthread_timedjoin_np, void *thread, void 
**ret,
 }
 #endif
 
-DEFINE_REAL_PTHREAD_FUNCTIONS
+DEFINE_INTERNAL_PTHREAD_FUNCTIONS
 
 DEFINE_REAL(int, vfork,)
 DECLARE_EXTERN_INTERCEPTOR_AND_WRAPPER(int, vfork,)
diff --git a/compiler-rt/lib/lsan/lsan_interceptors.cpp 
b/compiler-rt/lib/lsan/lsan_interceptors.cpp
index 1fd0010f9ea93..6df4b6865b379 100644
--- a/compiler-rt/lib/lsan/lsan_interceptors.cpp
+++ b/compiler-rt/lib/lsan/lsan_interceptors.cpp
@@ -525,7 +525,7 @@ INTERCEPTOR(int, pthread_timedjoin_np, void *thread, void 
**ret,
 #define LSAN_MAYBE_INTERCEPT_TIMEDJOIN
 #  endif  // SANITIZER_INTERCEPT_TIMEDJOIN
 
-DEFINE_REAL_PTHREAD_FUNCTIONS
+DEFINE_INTERNAL_PTHREAD_FUNCTIONS
 
 INTERCEPTOR(void, _exit, int status) {
   if (status == 0 && HasReportedLeaks()) status = common_flags()->exitcode;
diff --git a/compiler-rt/lib/memprof/memprof_interceptors.cpp 
b/compiler-rt/lib/memprof/memprof_interceptors.cpp
index a267f6d3d6717..53ee4e953419b 100644
--- a/compiler-rt/lib/memprof/memprof_interceptors.cpp
+++ b/compiler-rt/lib/memprof/memprof_interceptors.cpp
@@ -166,7 +166,7 @@ INTERCEPTOR(int, pthread_join, void *t, void **arg) {
   return REAL(pthread_join)(t, arg);
 }
 
-DEFINE_REAL_PTHREAD_FUNCTIONS
+DEFINE_INTERNAL_PTHREAD_FUNCTIONS
 
 INTERCEPTOR(char *, index, const char *string, int c)
 ALIAS(WRAP(strchr));
diff --git a/compiler-rt/lib/msan/msan_interceptors.cpp 
b/compiler-rt/lib/msan/msan_interceptors.cpp
index 9abf240633258..789b739b41189 100644
--- a/compiler-rt/lib/msan/msan_interceptors.cpp
+++ b/compiler-rt/lib/msan/msan_interceptors.cpp
@@ -1226,7 +1226,7 @@ INTERCEPTOR(int, pthread_timedjoin_np, void *thread, void 
**retval,
 }
 #endif
 
-DEFINE_REAL_PTHREAD_FUNCTIONS
+DEFINE_INTERNAL_PTHREAD_FUNCTIONS
 
 extern char *tzname[2];
 
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_common_libcdep.cpp 
b/compiler-rt/lib/sanitizer_common/sanitizer_common_libcdep.cpp
index 7b74bb1a7e0f3..a174ae7be991d 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_common_libcdep.cpp
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_common_libcdep.cpp
@@ -87,8 +87,8 @@ void MaybeStartBackgroudThread() {
   if (!common_flags()->hard_rss_limit_mb &&
   !common_flags()->soft_rss_limit_mb &&
   !common_flags()->heap_profile) return;
-  if (!_pthread_create) {
-VPrintf(1, "%s: real_pthread_create undefined\n", SanitizerToolName);
+  if (!_pthread_create) {
+VPrintf(1, "%s: internal_pthread_create undefined\n", SanitizerToolName);
 return;  // Can't spawn the thread anyway.
   }
 
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp 
b/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp
index 5d2dd3a7a658f..b590a9e7c3fc6 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp
@@ -1845,18 +1845,18 @@ HandleSignalMode GetHandleSignalMode(int signum) {
 
 #  if !SANITIZER_GO
 void *internal_start_thread(void *(*func)(void *arg), void *arg) {
-  if (_pthread_create == 0)
+  if (_pthread_create == 0)
 return nullptr;
   // Start the thread with signals blocked, otherwise it can steal user 
signals.
   ScopedBlockSignals block(nullptr);
   void *th;
-  

[llvm-branch-commits] [sanitizer] Rename DEFINE_REAL_PTHREAD_FUNCTIONS (PR #96527)

2024-06-24 Thread Vitaly Buka via llvm-branch-commits

https://github.com/vitalybuka created 
https://github.com/llvm/llvm-project/pull/96527

We use REAL() calls in interceptors, but
DEFINE_REAL_PTHREAD_FUNCTIONS has nothing to do
with them and only used for internal maintenance
threads.



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang][misexpect] Add support to clang for profitable annotation diagnostics (PR #96525)

2024-06-24 Thread via llvm-branch-commits

llvmbot wrote:



@llvm/pr-subscribers-llvm-transforms

@llvm/pr-subscribers-clang-driver

Author: Paul Kirth (ilovepi)


Changes

Add basic plumbing to clang so that diagnostics can be surfaced to
users.


---
Full diff: https://github.com/llvm/llvm-project/pull/96525.diff


9 Files Affected:

- (modified) clang/include/clang/Basic/CodeGenOptions.def (+1) 
- (modified) clang/include/clang/Driver/Options.td (+4) 
- (modified) clang/lib/CodeGen/BackendUtil.cpp (+1) 
- (modified) clang/lib/CodeGen/CodeGenAction.cpp (+4) 
- (modified) clang/lib/Driver/ToolChains/Clang.cpp (+7) 
- (modified) clang/lib/Frontend/CompilerInvocation.cpp (+3) 
- (added) clang/test/Profile/Inputs/missing-annotation.proftext (+18) 
- (added) clang/test/Profile/missing-annotation.c (+35) 
- (modified) llvm/lib/Transforms/Utils/MisExpect.cpp (+4-4) 


``diff
diff --git a/clang/include/clang/Basic/CodeGenOptions.def 
b/clang/include/clang/Basic/CodeGenOptions.def
index e3f6da4a84f69..fab91cd8a76b5 100644
--- a/clang/include/clang/Basic/CodeGenOptions.def
+++ b/clang/include/clang/Basic/CodeGenOptions.def
@@ -181,6 +181,7 @@ CODEGENOPT(FatalWarnings , 1, 0) ///< Set when 
-Wa,--fatal-warnings is
 CODEGENOPT(NoWarn, 1, 0) ///< Set when -Wa,--no-warn is enabled.
 CODEGENOPT(NoTypeCheck   , 1, 0) ///< Set when -Wa,--no-type-check is 
enabled.
 CODEGENOPT(MisExpect , 1, 0) ///< Set when -Wmisexpect is enabled
+CODEGENOPT(MissingAnnotations, 1, 0) ///< Set when suggesting missing perf 
annotations
 CODEGENOPT(EnableSegmentedStacks , 1, 0) ///< Set when -fsplit-stack is 
enabled.
 CODEGENOPT(StackClashProtector, 1, 0) ///< Set when -fstack-clash-protection 
is enabled.
 CODEGENOPT(NoImplicitFloat   , 1, 0) ///< Set when -mno-implicit-float is 
enabled.
diff --git a/clang/include/clang/Driver/Options.td 
b/clang/include/clang/Driver/Options.td
index c529cc9506667..6dfc5bb437034 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -2079,6 +2079,10 @@ def fdiagnostics_misexpect_tolerance_EQ : Joined<["-"], 
"fdiagnostics-misexpect-
 Group, Visibility<[ClangOption, CC1Option]>,
 MetaVarName<"">,
 HelpText<"Prevent misexpect diagnostics from being output if the profile 
counts are within N% of the expected. ">;
+defm diagnostics_missing_annotations : 
BoolFOption<"diagnostics-missing-annotations",
+CodeGenOpts<"MissingAnnotations">, DefaultFalse,
+PosFlag,
+NegFlag>;
 defm diagnostics_show_option : BoolFOption<"diagnostics-show-option",
 DiagnosticOpts<"ShowOptionNames">, DefaultTrue,
 NegFlag,
diff --git a/clang/lib/CodeGen/BackendUtil.cpp 
b/clang/lib/CodeGen/BackendUtil.cpp
index b09680086248d..2bcc23a6a9655 100644
--- a/clang/lib/CodeGen/BackendUtil.cpp
+++ b/clang/lib/CodeGen/BackendUtil.cpp
@@ -486,6 +486,7 @@ static bool initTargetOptions(DiagnosticsEngine ,
   Options.MCOptions.PPCUseFullRegisterNames =
   CodeGenOpts.PPCUseFullRegisterNames;
   Options.MisExpect = CodeGenOpts.MisExpect;
+  Options.MissingAnnotations = CodeGenOpts.MissingAnnotations;
 
   return true;
 }
diff --git a/clang/lib/CodeGen/CodeGenAction.cpp 
b/clang/lib/CodeGen/CodeGenAction.cpp
index 6d3efdb5ffe34..efebe28e5ebc4 100644
--- a/clang/lib/CodeGen/CodeGenAction.cpp
+++ b/clang/lib/CodeGen/CodeGenAction.cpp
@@ -326,6 +326,10 @@ void BackendConsumer::HandleTranslationUnit(ASTContext ) 
{
   CodeGenOpts.DiagnosticsMisExpectTolerance);
   }
 
+  if (CodeGenOpts.MissingAnnotations) {
+Ctx.setAnnotationDiagsRequested(true);
+  }
+
   // Link each LinkModule into our module.
   if (!CodeGenOpts.LinkBitcodePostopt && LinkInModules(getModule()))
 return;
diff --git a/clang/lib/Driver/ToolChains/Clang.cpp 
b/clang/lib/Driver/ToolChains/Clang.cpp
index 2ce9e2f4bcfcd..9704779e00258 100644
--- a/clang/lib/Driver/ToolChains/Clang.cpp
+++ b/clang/lib/Driver/ToolChains/Clang.cpp
@@ -4243,6 +4243,13 @@ static void RenderDiagnosticsOptions(const Driver , 
const ArgList ,
 CmdArgs.push_back(Args.MakeArgString(Opt));
   }
 
+  if (const Arg *A =
+  Args.getLastArg(options::OPT_fdiagnostics_missing_annotations,
+  options::OPT_fno_diagnostics_missing_annotations)) {
+if (A->getOption().matches(options::OPT_fdiagnostics_missing_annotations))
+  CmdArgs.push_back("-fdiagnostics-missing-annotations");
+  }
+
   if (const Arg *A = Args.getLastArg(options::OPT_fdiagnostics_format_EQ)) {
 CmdArgs.push_back("-fdiagnostics-format");
 CmdArgs.push_back(A->getValue());
diff --git a/clang/lib/Frontend/CompilerInvocation.cpp 
b/clang/lib/Frontend/CompilerInvocation.cpp
index a6d9f42ace9cc..f9d34cd011f2a 100644
--- a/clang/lib/Frontend/CompilerInvocation.cpp
+++ b/clang/lib/Frontend/CompilerInvocation.cpp
@@ -4775,6 +4775,9 @@ bool CompilerInvocation::CreateFromArgsImpl(
 }
   }
 
+  if(Args.hasArg(OPT_fdiagnostics_missing_annotations))
+Res.getCodeGenOpts().MissingAnnotations = true;
+
   if (LangOpts.CUDA) {
  

[llvm-branch-commits] [misexpect] Support diagnostics from frontend profile data (PR #96524)

2024-06-24 Thread via llvm-branch-commits

llvmbot wrote:



@llvm/pr-subscribers-pgo

@llvm/pr-subscribers-llvm-transforms

Author: Paul Kirth (ilovepi)


Changes



---
Full diff: https://github.com/llvm/llvm-project/pull/96524.diff


3 Files Affected:

- (modified) llvm/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp (+18) 
- (modified) llvm/lib/Transforms/Utils/MisExpect.cpp (+1-2) 
- (modified) llvm/test/Transforms/PGOProfile/missing-annotation.ll (+1-1) 


``diff
diff --git a/llvm/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp 
b/llvm/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp
index 17c5a4ee1fd0b..4075749d0d574 100644
--- a/llvm/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp
+++ b/llvm/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp
@@ -369,9 +369,21 @@ static bool lowerExpectIntrinsic(Function ) {
 if (BranchInst *BI = dyn_cast(BB.getTerminator())) {
   if (handleBranchExpect(*BI))
 ExpectIntrinsicsHandled++;
+  else {
+SmallVector Weights;
+if (extractBranchWeights(*BI, Weights))
+  misexpect::checkMissingAnnotations(*BI, Weights,
+ /*IsFrontendInstr=*/false);
+  }
 } else if (SwitchInst *SI = dyn_cast(BB.getTerminator())) {
   if (handleSwitchExpect(*SI))
 ExpectIntrinsicsHandled++;
+  else {
+SmallVector Weights;
+if (extractBranchWeights(*SI, Weights))
+  misexpect::checkMissingAnnotations(*SI, Weights,
+ /*isFrontend=*/false);
+  }
 }
 
 // Remove llvm.expect intrinsics. Iterate backwards in order
@@ -383,6 +395,12 @@ static bool lowerExpectIntrinsic(Function ) {
 if (SelectInst *SI = dyn_cast()) {
   if (handleBrSelExpect(*SI))
 ExpectIntrinsicsHandled++;
+  else {
+SmallVector Weights;
+if (extractBranchWeights(*SI, Weights))
+  misexpect::checkMissingAnnotations(*SI, Weights,
+ /*isFrontend=*/false);
+  }
 }
 continue;
   }
diff --git a/llvm/lib/Transforms/Utils/MisExpect.cpp 
b/llvm/lib/Transforms/Utils/MisExpect.cpp
index 933d9a146533d..1d88f867971e8 100644
--- a/llvm/lib/Transforms/Utils/MisExpect.cpp
+++ b/llvm/lib/Transforms/Utils/MisExpect.cpp
@@ -302,8 +302,7 @@ void checkMissingAnnotations(Instruction ,
 return;
 
   if (IsFrontendInstr) {
-// TODO: Frontend checking will have to be thought through, since we need
-// to do the check on branches that don't have expect intrinsics
+verifyMissingAnnotations(I, ExistingWeights);
   } else {
 SmallVector ExpectedWeights;
 if (extractBranchWeights(I, ExpectedWeights))
diff --git a/llvm/test/Transforms/PGOProfile/missing-annotation.ll 
b/llvm/test/Transforms/PGOProfile/missing-annotation.ll
index 6b52302449900..03b0b3bb5cc54 100644
--- a/llvm/test/Transforms/PGOProfile/missing-annotation.ll
+++ b/llvm/test/Transforms/PGOProfile/missing-annotation.ll
@@ -3,7 +3,7 @@
 
 ; RUN: llvm-profdata merge %S/Inputs/misexpect-branch-correct.proftext -o 
%t.profdata
 
-; RUN: opt < %s -passes="function(lower-expect),pgo-instr-use" 
-pgo-test-profile-file=%t.profdata -pgo-missing-annotations 
-pass-remarks=missing-annotation -S 2>&1 | FileCheck %s 
--check-prefix=MISSING_ANNOTATION
+; RUN: opt < %s -passes="function(lower-expect),pgo-instr-use" 
-pgo-test-profile-file=%t.profdata -pgo-missing-annotations 
-pass-remarks=missing-annotations -S 2>&1 | FileCheck %s 
--check-prefix=MISSING_ANNOTATION
 
 ; MISSING_ANNOTATION: remark: misexpect-branch.c:22:0:  Extremely hot 
condition. Consider adding llvm.expect intrinsic
 

``




https://github.com/llvm/llvm-project/pull/96524
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang][misexpect] Add support to clang for profitable annotation diagnostics (PR #96525)

2024-06-24 Thread Paul Kirth via llvm-branch-commits

https://github.com/ilovepi created 
https://github.com/llvm/llvm-project/pull/96525

Add basic plumbing to clang so that diagnostics can be surfaced to
users.



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm][misexpect] Enable diagnostics for profitable llvm.expect annotations (PR #96523)

2024-06-24 Thread via llvm-branch-commits

llvmbot wrote:



@llvm/pr-subscribers-llvm-transforms

@llvm/pr-subscribers-pgo

Author: Paul Kirth (ilovepi)


Changes

Issue #56502 describes an enhancement related to the use of llvm.expect.
The request is for a diagnostic mode that can identify branches that
would benefit from the use of llvm.expect based on the branch_weights
assigned from a PGO or sample profile.

To support identify branches(or switches) that would benefit from the
use of an llvm.expect intrinsic, we follow a similar checking pattern to
that used in MisExpect, but only in cases where MisExpect diagnostics
would not be used (i.e., when an llvm.expect intrinsic has already been
used).


---
Full diff: https://github.com/llvm/llvm-project/pull/96523.diff


8 Files Affected:

- (modified) llvm/include/llvm/IR/LLVMContext.h (+2) 
- (modified) llvm/include/llvm/Target/TargetOptions.h (+4) 
- (modified) llvm/include/llvm/Transforms/Utils/MisExpect.h (+3) 
- (modified) llvm/lib/IR/LLVMContext.cpp (+6) 
- (modified) llvm/lib/IR/LLVMContextImpl.h (+4) 
- (modified) llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp (+1) 
- (modified) llvm/lib/Transforms/Utils/MisExpect.cpp (+137-36) 
- (added) llvm/test/Transforms/PGOProfile/missing-annotation.ll (+110) 


``diff
diff --git a/llvm/include/llvm/IR/LLVMContext.h 
b/llvm/include/llvm/IR/LLVMContext.h
index 89ad6f1572c67..94e526d465c8e 100644
--- a/llvm/include/llvm/IR/LLVMContext.h
+++ b/llvm/include/llvm/IR/LLVMContext.h
@@ -212,6 +212,8 @@ class LLVMContext {
   void setMisExpectWarningRequested(bool Requested);
   void setDiagnosticsMisExpectTolerance(std::optional Tolerance);
   uint32_t getDiagnosticsMisExpectTolerance() const;
+  bool getAnnotationDiagsRequested() const;
+  void setAnnotationDiagsRequested(bool Requested);
 
   /// Return the minimum hotness value a diagnostic would need in order
   /// to be included in optimization diagnostics.
diff --git a/llvm/include/llvm/Target/TargetOptions.h 
b/llvm/include/llvm/Target/TargetOptions.h
index d3464b5202ff3..447b86f69cfaa 100644
--- a/llvm/include/llvm/Target/TargetOptions.h
+++ b/llvm/include/llvm/Target/TargetOptions.h
@@ -377,6 +377,10 @@ namespace llvm {
 /// By default, it is set to false
 unsigned MisExpect : 1;
 
+/// When set to true, enable MissingAnnotations diagnostics
+/// By default, it is set to false
+unsigned MissingAnnotations : 1;
+
 /// When set to true, const objects with relocatable address values are put
 /// into the RO data section.
 unsigned XCOFFReadOnlyPointers : 1;
diff --git a/llvm/include/llvm/Transforms/Utils/MisExpect.h 
b/llvm/include/llvm/Transforms/Utils/MisExpect.h
index e9fba47c97a4d..118d859f93dd6 100644
--- a/llvm/include/llvm/Transforms/Utils/MisExpect.h
+++ b/llvm/include/llvm/Transforms/Utils/MisExpect.h
@@ -76,6 +76,9 @@ void checkExpectAnnotations(Instruction ,
 const ArrayRef ExistingWeights,
 bool IsFrontend);
 
+void checkMissingAnnotations(Instruction ,
+ const ArrayRef ExistingWeights,
+ bool IsFrontendInstr);
 } // namespace misexpect
 } // namespace llvm
 
diff --git a/llvm/lib/IR/LLVMContext.cpp b/llvm/lib/IR/LLVMContext.cpp
index 8120cccace40b..e1a05c1347969 100644
--- a/llvm/lib/IR/LLVMContext.cpp
+++ b/llvm/lib/IR/LLVMContext.cpp
@@ -171,6 +171,12 @@ void LLVMContext::setDiagnosticsMisExpectTolerance(
 uint32_t LLVMContext::getDiagnosticsMisExpectTolerance() const {
   return pImpl->DiagnosticsMisExpectTolerance.value_or(0);
 }
+void LLVMContext::setAnnotationDiagsRequested(bool Requested) {
+  pImpl->AnnotationsDiagsRequested = Requested;
+}
+bool LLVMContext::getAnnotationDiagsRequested() const {
+  return pImpl->AnnotationsDiagsRequested;
+}
 
 bool LLVMContext::isDiagnosticsHotnessThresholdSetFromPSI() const {
   return !pImpl->DiagnosticsHotnessThreshold.has_value();
diff --git a/llvm/lib/IR/LLVMContextImpl.h b/llvm/lib/IR/LLVMContextImpl.h
index 5f8df87149f04..59eb5eb29705d 100644
--- a/llvm/lib/IR/LLVMContextImpl.h
+++ b/llvm/lib/IR/LLVMContextImpl.h
@@ -1495,6 +1495,10 @@ class LLVMContextImpl {
   std::optional DiagnosticsMisExpectTolerance = 0;
   bool MisExpectWarningRequested = false;
 
+  /// Enables Diagnostics for Missing llvm.expect annotations on extremely hot
+  /// branches
+  bool AnnotationsDiagsRequested = false;
+
   /// The specialized remark streamer used by LLVM's OptimizationRemarkEmitter.
   std::unique_ptr LLVMRS;
 
diff --git a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp 
b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
index 572d37a2b3e55..0fbf60194696a 100644
--- a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
+++ b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
@@ -2261,6 +2261,7 @@ void llvm::setProfMetadata(Module *M, Instruction *TI,
   } dbgs() << "\n";);
 
   misexpect::checkExpectAnnotations(*TI, Weights, /*IsFrontend=*/false);
+  

[llvm-branch-commits] [misexpect] Support diagnostics from frontend profile data (PR #96524)

2024-06-24 Thread Paul Kirth via llvm-branch-commits

https://github.com/ilovepi created 
https://github.com/llvm/llvm-project/pull/96524

None


___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm][misexpect] Enable diagnostics for profitable llvm.expect annotations (PR #96523)

2024-06-24 Thread Paul Kirth via llvm-branch-commits

https://github.com/ilovepi created 
https://github.com/llvm/llvm-project/pull/96523

Issue #56502 describes an enhancement related to the use of llvm.expect.
The request is for a diagnostic mode that can identify branches that
would benefit from the use of llvm.expect based on the branch_weights
assigned from a PGO or sample profile.

To support identify branches(or switches) that would benefit from the
use of an llvm.expect intrinsic, we follow a similar checking pattern to
that used in MisExpect, but only in cases where MisExpect diagnostics
would not be used (i.e., when an llvm.expect intrinsic has already been
used).



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] AMDGPU: Add a subtarget feature for fine-grained remote memory support (PR #96442)

2024-06-24 Thread Matt Arsenault via llvm-branch-commits

arsenm wrote:

> need some tests

This does nothing as it is. The real use patches have thousands of tests 

https://github.com/llvm/llvm-project/pull/96442
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] AMDGPU: Add subtarget feature for global atomic fadd denormal support (PR #96443)

2024-06-24 Thread Stanislav Mekhanoshin via llvm-branch-commits

rampitec wrote:

It is worse than that. It behaves differently depending on where atomic is 
executed. There is no single answer if this instruction supports denorms or not.

https://github.com/llvm/llvm-project/pull/96443
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] AMDGPU: Add subtarget feature for memory atomic fadd f64 (PR #96444)

2024-06-24 Thread Stanislav Mekhanoshin via llvm-branch-commits

rampitec wrote:

Use it in a predicate when defining pseudos?

https://github.com/llvm/llvm-project/pull/96444
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] AMDGPU: Add a subtarget feature for fine-grained remote memory support (PR #96442)

2024-06-24 Thread Yaxun Liu via llvm-branch-commits

yxsamliu wrote:

need some tests

https://github.com/llvm/llvm-project/pull/96442
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] AMDGPU: Add a subtarget feature for fine-grained remote memory support (PR #96442)

2024-06-24 Thread Matt Arsenault via llvm-branch-commits

arsenm wrote:

No, the string description mentions this is for the non-PCIe supported cases 

https://github.com/llvm/llvm-project/pull/96442
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm][ProfDataUtils] provide getNumBranchWeights API (PR #90146)

2024-06-24 Thread Paul Kirth via llvm-branch-commits

https://github.com/ilovepi updated 
https://github.com/llvm/llvm-project/pull/90146


___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm][ProfDataUtils] provide getNumBranchWeights API (PR #90146)

2024-06-24 Thread Paul Kirth via llvm-branch-commits

https://github.com/ilovepi updated 
https://github.com/llvm/llvm-project/pull/90146


___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm][ProfDataUtils] provide getNumBranchWeights API (PR #90146)

2024-06-24 Thread Paul Kirth via llvm-branch-commits

https://github.com/ilovepi updated 
https://github.com/llvm/llvm-project/pull/90146


___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm][ProfDataUtils] provide getNumBranchWeights API (PR #90146)

2024-06-24 Thread Paul Kirth via llvm-branch-commits

https://github.com/ilovepi updated 
https://github.com/llvm/llvm-project/pull/90146


___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm][misexpect] Update MisExpect to use provenance tracking metadata (PR #86610)

2024-06-24 Thread Paul Kirth via llvm-branch-commits

https://github.com/ilovepi updated 
https://github.com/llvm/llvm-project/pull/86610


___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm][misexpect] Update MisExpect to use provenance tracking metadata (PR #86610)

2024-06-24 Thread Paul Kirth via llvm-branch-commits

https://github.com/ilovepi updated 
https://github.com/llvm/llvm-project/pull/86610


___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] AMDGPU: Add a subtarget feature for fine-grained remote memory support (PR #96442)

2024-06-24 Thread Yaxun Liu via llvm-branch-commits

yxsamliu wrote:

If a sub target does not have this feature, does none of the atomic 
instructions work for fine-grained remote memory, including integer atomic 
add/xchg/cmpxchg?

https://github.com/llvm/llvm-project/pull/96442
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [compiler-rt] [TySan] Improved compatability for tests (PR #96507)

2024-06-24 Thread via llvm-branch-commits

https://github.com/gbMattN edited 
https://github.com/llvm/llvm-project/pull/96507
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [compiler-rt] [TySan] Improved compatability for tests (PR #96507)

2024-06-24 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-compiler-rt-sanitizer

Author: None (gbMattN)


Changes

All the violation tests failed when running on my machine. By changing some 
check lines to regular expressions, we can account for file locality, and 
hardware specific type differences. There was also an include needed for better 
integer types.

---
Full diff: https://github.com/llvm/llvm-project/pull/96507.diff


6 Files Affected:

- (modified) compiler-rt/test/tysan/violation-pr45282.c (+1-1) 
- (modified) compiler-rt/test/tysan/violation-pr47137.c (+3-2) 
- (modified) compiler-rt/test/tysan/violation-pr62544.c (+1-1) 
- (modified) compiler-rt/test/tysan/violation-pr62828.cpp (+1-1) 
- (modified) compiler-rt/test/tysan/violation-pr68655.cpp (+1-1) 
- (modified) compiler-rt/test/tysan/violation-pr86685.c (+1-1) 


``diff
diff --git a/compiler-rt/test/tysan/violation-pr45282.c 
b/compiler-rt/test/tysan/violation-pr45282.c
index 2cbc37b3d1832..37e710ae18deb 100644
--- a/compiler-rt/test/tysan/violation-pr45282.c
+++ b/compiler-rt/test/tysan/violation-pr45282.c
@@ -18,7 +18,7 @@ int main(void) {
 
 // CHECK:  TypeSanitizer: type-aliasing-violation on address
 // CHECK-NEXT: WRITE of size 8 at {{.+}} with type double accesses an existing 
object of type float
-// CHECK-NEXT:   in main violation-pr45282.c:25
+// CHECK-NEXT:   in main {{.*violation-pr45282.c:25.*}}
 
   // loop of problems
   for (j = 2; j <= 4; ++j) {
diff --git a/compiler-rt/test/tysan/violation-pr47137.c 
b/compiler-rt/test/tysan/violation-pr47137.c
index 04d68d1dd936e..7b996d39f81e2 100644
--- a/compiler-rt/test/tysan/violation-pr47137.c
+++ b/compiler-rt/test/tysan/violation-pr47137.c
@@ -4,6 +4,7 @@
 // https://github.com/llvm/llvm-project/issues/47137
 #include 
 #include 
+#include 
 
 void f(int m) {
   int n = (4 * m + 2) / 3;
@@ -23,8 +24,8 @@ void f(int m) {
   }
 
 // CHECK:  TypeSanitizer: type-aliasing-violation on address
-// CHECK-NEXT: READ of size 2 at {{.+}} with type short accesses an existing 
object of type long long
-// CHECK-NEXT:in f violation-pr47137.c:30
+// CHECK-NEXT: READ of size 2 at {{.+}} with type short accesses an existing 
object of type {{(long)+}}
+// CHECK-NEXT:in f {{.*violation-pr47137.c:31.*}}
   for (int i = 0, j = 0; j < 4 * m; i += 4, j += 3) {
 for (int k = 0; k < 3; k++) {
   ((uint16_t *)a)[j + k] = ((uint16_t *)a)[i + k];
diff --git a/compiler-rt/test/tysan/violation-pr62544.c 
b/compiler-rt/test/tysan/violation-pr62544.c
index 4187a91bde3fc..7c6cb59156d15 100644
--- a/compiler-rt/test/tysan/violation-pr62544.c
+++ b/compiler-rt/test/tysan/violation-pr62544.c
@@ -18,7 +18,7 @@ int main() {
 
 // CHECK:  TypeSanitizer: type-aliasing-violation on address
 // CHECK-NEXT: WRITE of size 2 at {{.+}} with type short accesses an existing 
object of type int
-// CHECK-NEXT:   in main violation-pr62544.c:22
+// CHECK-NEXT:   in main {{.*violation-pr62544.c:22.*}}
   *e = 3;
   printf("%d\n", a);
 }
diff --git a/compiler-rt/test/tysan/violation-pr62828.cpp 
b/compiler-rt/test/tysan/violation-pr62828.cpp
index 879200c8069b0..f815227d86248 100644
--- a/compiler-rt/test/tysan/violation-pr62828.cpp
+++ b/compiler-rt/test/tysan/violation-pr62828.cpp
@@ -24,7 +24,7 @@ short *test1(int_v8 *cast_c_array, short_v8 *shuf_c_array1, 
int *ptr) {
 
 // CHECK:  ERROR: TypeSanitizer: type-aliasing-violation on address
 // CHECK-NEXT: READ of size 2 at {{.+}} with type short accesses an existing 
object of type int
-// CHECK-NEXT:in test1(int (*) [8], short (*) [8], int*) 
violation-pr62828.cpp:29
+// CHECK-NEXT:in test1(int (*) [8], short (*) [8], int*) 
{{.*violation-pr62828.cpp:29.*}}
   for (int i3 = 0; i3 < 4; ++i3) {
 output2[i3] = input2[(i3 * 2)];
   }
diff --git a/compiler-rt/test/tysan/violation-pr68655.cpp 
b/compiler-rt/test/tysan/violation-pr68655.cpp
index ac20f8c94e1ff..615971c75d20e 100644
--- a/compiler-rt/test/tysan/violation-pr68655.cpp
+++ b/compiler-rt/test/tysan/violation-pr68655.cpp
@@ -9,7 +9,7 @@ struct S1 {
 
 // CHECK: TypeSanitizer: type-aliasing-violation on address
 // CHECK-NEXT:  READ of size 4 at {{.+}} with type int accesses an existing 
object of type long long (in S1 at offset 0)
-// CHECK-NEXT: in copyMem(S1*, S1*) violation-pr68655.cpp:19
+// CHECK-NEXT: in copyMem(S1*, S1*) {{.*violation-pr68655.cpp:19.*}}
 
 void inline copyMem(S1 *dst, S1 *src) {
   unsigned *d = reinterpret_cast(dst);
diff --git a/compiler-rt/test/tysan/violation-pr86685.c 
b/compiler-rt/test/tysan/violation-pr86685.c
index b5198c440fa44..6667fc1805195 100644
--- a/compiler-rt/test/tysan/violation-pr86685.c
+++ b/compiler-rt/test/tysan/violation-pr86685.c
@@ -13,7 +13,7 @@ void foo(int *s, float *f, long n) {
 
 // CHECK:  TypeSanitizer: type-aliasing-violation on address
 // CHECK-NEXT: WRITE of size 4 at {{.+}} with type int accesses an existing 
object of type float
-// CHECK-NEXT:   #0 {{.+}} in foo violation-pr86685.c:17
+// CHECK-NEXT:   #0 {{.+}} in 

[llvm-branch-commits] [compiler-rt] [TySan] Improved compatability for tests (PR #96507)

2024-06-24 Thread via llvm-branch-commits

gbMattN wrote:

@fhahn (Sorry for the ping, I can't manually add reviewers yet)

https://github.com/llvm/llvm-project/pull/96507
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [compiler-rt] [TySan] Improved compatability for tests (PR #96507)

2024-06-24 Thread via llvm-branch-commits

github-actions[bot] wrote:



Thank you for submitting a Pull Request (PR) to the LLVM Project!

This PR will be automatically labeled and the relevant teams will be
notified.

If you wish to, you can add reviewers by using the "Reviewers" section on this 
page.

If this is not working for you, it is probably because you do not have write
permissions for the repository. In which case you can instead tag reviewers by
name in a comment by using `@` followed by their GitHub username.

If you have received no comments on your PR for a week, you can request a review
by "ping"ing the PR by adding a comment “Ping”. The common courtesy "ping" rate
is once a week. Please remember that you are asking for valuable time from 
other developers.

If you have further questions, they may be answered by the [LLVM GitHub User 
Guide](https://llvm.org/docs/GitHub.html).

You can also ask questions in a comment on this PR, on the [LLVM 
Discord](https://discord.com/invite/xS7Z362) or on the 
[forums](https://discourse.llvm.org/).

https://github.com/llvm/llvm-project/pull/96507
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [compiler-rt] [TySan] Improved compatability for tests (PR #96507)

2024-06-24 Thread via llvm-branch-commits

https://github.com/gbMattN created 
https://github.com/llvm/llvm-project/pull/96507

All the violation tests failed when running on my machine. By changing some 
check lines to regular expressions, we can account for file locality, and 
hardware specific type differences. There was also an include needed for better 
integer types.

>From 712aa7fd384a773dad87a8a2f7a32b6a593c3b35 Mon Sep 17 00:00:00 2001
From: Matthew Nagy 
Date: Mon, 24 Jun 2024 15:07:01 +
Subject: [PATCH] [TySan] Improved compatability for tests

---
 compiler-rt/test/tysan/violation-pr45282.c   | 2 +-
 compiler-rt/test/tysan/violation-pr47137.c   | 5 +++--
 compiler-rt/test/tysan/violation-pr62544.c   | 2 +-
 compiler-rt/test/tysan/violation-pr62828.cpp | 2 +-
 compiler-rt/test/tysan/violation-pr68655.cpp | 2 +-
 compiler-rt/test/tysan/violation-pr86685.c   | 2 +-
 6 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/compiler-rt/test/tysan/violation-pr45282.c 
b/compiler-rt/test/tysan/violation-pr45282.c
index 2cbc37b3d1832..37e710ae18deb 100644
--- a/compiler-rt/test/tysan/violation-pr45282.c
+++ b/compiler-rt/test/tysan/violation-pr45282.c
@@ -18,7 +18,7 @@ int main(void) {
 
 // CHECK:  TypeSanitizer: type-aliasing-violation on address
 // CHECK-NEXT: WRITE of size 8 at {{.+}} with type double accesses an existing 
object of type float
-// CHECK-NEXT:   in main violation-pr45282.c:25
+// CHECK-NEXT:   in main {{.*violation-pr45282.c:25.*}}
 
   // loop of problems
   for (j = 2; j <= 4; ++j) {
diff --git a/compiler-rt/test/tysan/violation-pr47137.c 
b/compiler-rt/test/tysan/violation-pr47137.c
index 04d68d1dd936e..7b996d39f81e2 100644
--- a/compiler-rt/test/tysan/violation-pr47137.c
+++ b/compiler-rt/test/tysan/violation-pr47137.c
@@ -4,6 +4,7 @@
 // https://github.com/llvm/llvm-project/issues/47137
 #include 
 #include 
+#include 
 
 void f(int m) {
   int n = (4 * m + 2) / 3;
@@ -23,8 +24,8 @@ void f(int m) {
   }
 
 // CHECK:  TypeSanitizer: type-aliasing-violation on address
-// CHECK-NEXT: READ of size 2 at {{.+}} with type short accesses an existing 
object of type long long
-// CHECK-NEXT:in f violation-pr47137.c:30
+// CHECK-NEXT: READ of size 2 at {{.+}} with type short accesses an existing 
object of type {{(long)+}}
+// CHECK-NEXT:in f {{.*violation-pr47137.c:31.*}}
   for (int i = 0, j = 0; j < 4 * m; i += 4, j += 3) {
 for (int k = 0; k < 3; k++) {
   ((uint16_t *)a)[j + k] = ((uint16_t *)a)[i + k];
diff --git a/compiler-rt/test/tysan/violation-pr62544.c 
b/compiler-rt/test/tysan/violation-pr62544.c
index 4187a91bde3fc..7c6cb59156d15 100644
--- a/compiler-rt/test/tysan/violation-pr62544.c
+++ b/compiler-rt/test/tysan/violation-pr62544.c
@@ -18,7 +18,7 @@ int main() {
 
 // CHECK:  TypeSanitizer: type-aliasing-violation on address
 // CHECK-NEXT: WRITE of size 2 at {{.+}} with type short accesses an existing 
object of type int
-// CHECK-NEXT:   in main violation-pr62544.c:22
+// CHECK-NEXT:   in main {{.*violation-pr62544.c:22.*}}
   *e = 3;
   printf("%d\n", a);
 }
diff --git a/compiler-rt/test/tysan/violation-pr62828.cpp 
b/compiler-rt/test/tysan/violation-pr62828.cpp
index 879200c8069b0..f815227d86248 100644
--- a/compiler-rt/test/tysan/violation-pr62828.cpp
+++ b/compiler-rt/test/tysan/violation-pr62828.cpp
@@ -24,7 +24,7 @@ short *test1(int_v8 *cast_c_array, short_v8 *shuf_c_array1, 
int *ptr) {
 
 // CHECK:  ERROR: TypeSanitizer: type-aliasing-violation on address
 // CHECK-NEXT: READ of size 2 at {{.+}} with type short accesses an existing 
object of type int
-// CHECK-NEXT:in test1(int (*) [8], short (*) [8], int*) 
violation-pr62828.cpp:29
+// CHECK-NEXT:in test1(int (*) [8], short (*) [8], int*) 
{{.*violation-pr62828.cpp:29.*}}
   for (int i3 = 0; i3 < 4; ++i3) {
 output2[i3] = input2[(i3 * 2)];
   }
diff --git a/compiler-rt/test/tysan/violation-pr68655.cpp 
b/compiler-rt/test/tysan/violation-pr68655.cpp
index ac20f8c94e1ff..615971c75d20e 100644
--- a/compiler-rt/test/tysan/violation-pr68655.cpp
+++ b/compiler-rt/test/tysan/violation-pr68655.cpp
@@ -9,7 +9,7 @@ struct S1 {
 
 // CHECK: TypeSanitizer: type-aliasing-violation on address
 // CHECK-NEXT:  READ of size 4 at {{.+}} with type int accesses an existing 
object of type long long (in S1 at offset 0)
-// CHECK-NEXT: in copyMem(S1*, S1*) violation-pr68655.cpp:19
+// CHECK-NEXT: in copyMem(S1*, S1*) {{.*violation-pr68655.cpp:19.*}}
 
 void inline copyMem(S1 *dst, S1 *src) {
   unsigned *d = reinterpret_cast(dst);
diff --git a/compiler-rt/test/tysan/violation-pr86685.c 
b/compiler-rt/test/tysan/violation-pr86685.c
index b5198c440fa44..6667fc1805195 100644
--- a/compiler-rt/test/tysan/violation-pr86685.c
+++ b/compiler-rt/test/tysan/violation-pr86685.c
@@ -13,7 +13,7 @@ void foo(int *s, float *f, long n) {
 
 // CHECK:  TypeSanitizer: type-aliasing-violation on address
 // CHECK-NEXT: WRITE of size 4 at {{.+}} with type int accesses an existing 
object of type float
-// CHECK-NEXT:   #0 {{.+}} in 

[llvm-branch-commits] [llvm] AMDGPU: Add subtarget feature for memory atomic fadd f64 (PR #96444)

2024-06-24 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm updated 
https://github.com/llvm/llvm-project/pull/96444

>From 80c3f71f03d3b2ccbcd418d76d417f2a243fdbe4 Mon Sep 17 00:00:00 2001
From: Matt Arsenault 
Date: Sun, 23 Jun 2024 17:07:53 +0200
Subject: [PATCH 1/2] AMDGPU: Add subtarget feature for memory atomic fadd f64

---
 llvm/lib/Target/AMDGPU/AMDGPU.td  | 10 +-
 llvm/lib/Target/AMDGPU/GCNSubtarget.h |  7 +++
 llvm/lib/Target/AMDGPU/SIISelLowering.cpp |  2 +-
 3 files changed, 17 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
index 0ec65f759bc35..028c54d8d94d2 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -788,6 +788,13 @@ def FeatureFlatAtomicFaddF32Inst
   "Has flat_atomic_add_f32 instruction"
 >;
 
+def FeatureFlatBufferGlobalAtomicFaddF64Inst
+  : SubtargetFeature<"flat-buffer-global-fadd-f64-inst",
+  "HasFlatBufferGlobalAtomicFaddF64Inst",
+  "true",
+  "Has flat, buffer, and global instructions for f64 atomic fadd"
+>;
+
 def FeatureMemoryAtomicFaddF32DenormalSupport
   : SubtargetFeature<"memory-atomic-fadd-f32-denormal-support",
   "HasAtomicMemoryAtomicFaddF32DenormalSupport",
@@ -1388,7 +1395,8 @@ def FeatureISAVersion9_0_A : FeatureSet<
  FeatureBackOffBarrier,
  FeatureKernargPreload,
  FeatureAtomicFMinFMaxF64GlobalInsts,
- FeatureAtomicFMinFMaxF64FlatInsts
+ FeatureAtomicFMinFMaxF64FlatInsts,
+ FeatureFlatBufferGlobalAtomicFaddF64Inst
  ])>;
 
 def FeatureISAVersion9_0_C : FeatureSet<
diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h 
b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
index 674d84422538f..922435c5efaa6 100644
--- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
@@ -174,6 +174,7 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
   bool HasAtomicGlobalPkAddBF16Inst = false;
   bool HasAtomicBufferPkAddBF16Inst = false;
   bool HasFlatAtomicFaddF32Inst = false;
+  bool HasFlatBufferGlobalAtomicFaddF64Inst = false;
   bool HasDefaultComponentZero = false;
   bool HasAgentScopeFineGrainedRemoteMemoryAtomics = false;
   bool HasDefaultComponentBroadcast = false;
@@ -873,6 +874,12 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
 
   bool hasFlatAtomicFaddF32Inst() const { return HasFlatAtomicFaddF32Inst; }
 
+  /// \return true if the target has flat, global, and buffer atomic fadd for
+  /// double.
+  bool hasFlatBufferGlobalAtomicFaddF64Inst() const {
+return HasFlatBufferGlobalAtomicFaddF64Inst;
+  }
+
   /// \return true if the target's flat, global, and buffer atomic fadd for
   /// float supports denormal handling.
   bool hasMemoryAtomicFaddF32DenormalSupport() const {
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp 
b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index eec750e5b8251..6b5ba160d6402 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -16028,7 +16028,7 @@ 
SITargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const {
   return AtomicExpansionKind::CmpXChg;
 
 // global and flat atomic fadd f64: gfx90a, gfx940.
-if (Subtarget->hasGFX90AInsts() && Ty->isDoubleTy())
+if (Subtarget->hasFlatBufferGlobalAtomicFaddF64Inst() && Ty->isDoubleTy())
   return ReportUnsafeHWInst(AtomicExpansionKind::None);
 
 if (AS != AMDGPUAS::FLAT_ADDRESS && Ty->isFloatTy()) {

>From c1354032fc55234ffddf9136f17f5ee400c01c16 Mon Sep 17 00:00:00 2001
From: Matt Arsenault 
Date: Mon, 24 Jun 2024 15:42:17 +0200
Subject: [PATCH 2/2] Add to gfx940

---
 llvm/lib/Target/AMDGPU/AMDGPU.td | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
index 028c54d8d94d2..3ed68a259ca15 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -1441,7 +1441,8 @@ def FeatureISAVersion9_4_Common : FeatureSet<
FeatureAtomicFMinFMaxF64GlobalInsts,
FeatureAtomicFMinFMaxF64FlatInsts,
FeatureAgentScopeFineGrainedRemoteMemoryAtomics,
-   FeatureMemoryAtomicFaddF32DenormalSupport
+   FeatureMemoryAtomicFaddF32DenormalSupport,
+   FeatureFlatBufferGlobalAtomicFaddF64Inst
]>;
 
 def FeatureISAVersion9_4_0 : FeatureSet<

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [mlir] [mlir][linalg] Implement TilingInterface for winograd operators (PR #96184)

2024-06-24 Thread Oleksandr Alex Zinenko via llvm-branch-commits


@@ -2638,4 +2638,41 @@ def WinogradConv2DOp : Op {
+  let description = [{
+Decompose winograd operators. It will convert filter, input and output
+transform operators into a combination of scf, tensor, and linalg

ftynse wrote:

Nit: operations

https://github.com/llvm/llvm-project/pull/96184
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [mlir] [mlir][linalg] Implement TilingInterface for winograd operators (PR #96184)

2024-06-24 Thread Oleksandr Alex Zinenko via llvm-branch-commits


@@ -2760,6 +2760,89 @@ LogicalResult WinogradFilterTransformOp::verify() {
   return success();
 }
 
+SmallVector
+WinogradFilterTransformOp::getIterationDomain(OpBuilder ) {
+  Location loc = getLoc();
+  Value zero = builder.create(loc, 0);
+  Value one = builder.create(loc, 1);
+  Value output = getOutput();
+  SmallVector loopBounds(6);
+  for (unsigned dim = 0; dim < 6; ++dim) {
+loopBounds[dim].offset = zero;
+loopBounds[dim].size = getDimValue(builder, loc, output, dim);
+loopBounds[dim].stride = one;
+  }
+  return loopBounds;
+}
+
+SmallVector
+WinogradFilterTransformOp::getLoopIteratorTypes() {
+  SmallVector iteratorTypes(6,
+ 
utils::IteratorType::parallel);
+  return iteratorTypes;
+}
+
+Value getValueFromOpFoldResult(OpFoldResult opFoldResult, OpBuilder ,
+   Location loc) {
+  if (auto val = opFoldResult.dyn_cast()) {
+return val;
+  } else if (auto attr = opFoldResult.dyn_cast()) {
+auto intAttr = cast(attr);
+return builder.create(loc, intAttr);
+  }
+  // This should never happen if OpFoldResult is correctly formed.

ftynse wrote:

Then this should be an assertion.

https://github.com/llvm/llvm-project/pull/96184
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [mlir] [mlir][linalg] Implement TilingInterface for winograd operators (PR #96184)

2024-06-24 Thread Oleksandr Alex Zinenko via llvm-branch-commits


@@ -2760,6 +2760,89 @@ LogicalResult WinogradFilterTransformOp::verify() {
   return success();
 }
 
+SmallVector
+WinogradFilterTransformOp::getIterationDomain(OpBuilder ) {
+  Location loc = getLoc();
+  Value zero = builder.create(loc, 0);
+  Value one = builder.create(loc, 1);

ftynse wrote:

IIRC, `Range` contains list of `OpFoldResult`, meaning we can put attributes 
there and not materialize operations for these constants.

https://github.com/llvm/llvm-project/pull/96184
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [mlir] [mlir][linalg] Implement TilingInterface for winograd operators (PR #96184)

2024-06-24 Thread Oleksandr Alex Zinenko via llvm-branch-commits


@@ -2760,6 +2760,89 @@ LogicalResult WinogradFilterTransformOp::verify() {
   return success();
 }
 
+SmallVector
+WinogradFilterTransformOp::getIterationDomain(OpBuilder ) {
+  Location loc = getLoc();
+  Value zero = builder.create(loc, 0);
+  Value one = builder.create(loc, 1);
+  Value output = getOutput();
+  SmallVector loopBounds(6);
+  for (unsigned dim = 0; dim < 6; ++dim) {
+loopBounds[dim].offset = zero;
+loopBounds[dim].size = getDimValue(builder, loc, output, dim);
+loopBounds[dim].stride = one;
+  }
+  return loopBounds;
+}
+
+SmallVector
+WinogradFilterTransformOp::getLoopIteratorTypes() {
+  SmallVector iteratorTypes(6,
+ 
utils::IteratorType::parallel);
+  return iteratorTypes;
+}
+
+Value getValueFromOpFoldResult(OpFoldResult opFoldResult, OpBuilder ,
+   Location loc) {
+  if (auto val = opFoldResult.dyn_cast()) {
+return val;
+  } else if (auto attr = opFoldResult.dyn_cast()) {
+auto intAttr = cast(attr);
+return builder.create(loc, intAttr);
+  }

ftynse wrote:

I suspect this might already exist somewhere in the arith dialect.

https://github.com/llvm/llvm-project/pull/96184
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [mlir] [mlir][linalg] Implement TilingInterface for winograd operators (PR #96184)

2024-06-24 Thread Oleksandr Alex Zinenko via llvm-branch-commits

https://github.com/ftynse commented:

I think @MaheshRavishankar should take a look at the interface implementation 
details.

https://github.com/llvm/llvm-project/pull/96184
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [mlir] [mlir][linalg] Implement TilingInterface for winograd operators (PR #96184)

2024-06-24 Thread Oleksandr Alex Zinenko via llvm-branch-commits

https://github.com/ftynse edited https://github.com/llvm/llvm-project/pull/96184
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [mlir] [mlir][linalg] Decompose winograd operators (PR #96183)

2024-06-24 Thread Oleksandr Alex Zinenko via llvm-branch-commits


@@ -36,6 +189,92 @@ constexpr TransformMapKeyTy F_2_3{2, 3};
 constexpr TransformMapKeyTy F_4_3{4, 3};
 constexpr TransformMapKeyTy F_2_5{2, 5};
 
+struct TransformMatrix {
+  TransformMatrix(const float *table, int64_t rows, int64_t cols,
+  int64_t scalarFactor = 1)
+  : table(table), rows(rows), cols(cols), scalarFactor(scalarFactor) {}
+
+  const float *table;
+  int64_t rows;
+  int64_t cols;
+  int64_t scalarFactor;
+};
+
+Value create2DTransformMatrix(RewriterBase , Location loc,
+  TransformMatrix transform, Type type) {
+  ArrayRef const_vec(transform.table, transform.rows * transform.cols);

ftynse wrote:

Nit: camelBack
```suggestion
  ArrayRef constVec(transform.table, transform.rows * transform.cols);
```

https://github.com/llvm/llvm-project/pull/96183
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [mlir] [mlir][linalg] Decompose winograd operators (PR #96183)

2024-06-24 Thread Oleksandr Alex Zinenko via llvm-branch-commits


@@ -100,6 +594,161 @@ Value matrixMultiply(RewriterBase , Location loc,
   return expandOutput;
 }
 
+// This function transforms the output. The data layout of the output is HWNF.
+// The transformation matrix is 2-dimension. We need to extract H x W from
+// HWNF first. We need to generate 2 levels of loops to iterate on N and F.
+// After the transformation, we get
+//
+// scf.for %n = lo_n to hi_n step 1
+//   scf.for %f = lo_f to hi_f step 1
+// %extracted = extract input from result
+// %ret = linalg.matmul AT, %extracted
+// %ret = linalg.matmul %ret, A
+// %inserted = insert %ret into ret
+//
+Value outputTransform(RewriterBase , Location loc, Value value,
+  Value output, int64_t m, int64_t r,
+  bool leftTransform = true, bool rightTransform = true) {
+  // Map from (m, r) to AT transform matrix.
+  static const llvm::SmallDenseMap
+  ATMatrices = {
+  {F_2_3, TransformMatrix(AT_2x2_3x3, 2, 4)},
+  {F_4_3, TransformMatrix(AT_4x4_3x3, 4, 6, 32)},
+  {F_2_5, TransformMatrix(AT_2x2_5x5, 2, 6, 16)},
+  };
+
+  // Map from (m, r) to A transform matrix.
+  static const llvm::SmallDenseMap
+  AMatrices = {
+  {F_2_3, TransformMatrix(A_2x2_3x3, 4, 2)},
+  {F_4_3, TransformMatrix(A_4x4_3x3, 6, 4, 32)},
+  {F_2_5, TransformMatrix(A_2x2_5x5, 6, 2, 16)},
+  };
+
+  auto valueType = cast(value.getType());
+  Type elementType = valueType.getElementType();
+  auto valueShape = valueType.getShape(); // TileH, TileW, H, W, N, F
+  int64_t valueH = valueShape[2];
+  int64_t valueW = valueShape[3];
+  int64_t valueN = valueShape[4];
+  int64_t valueF = valueShape[5];
+  int64_t alphaH = leftTransform ? m + r - 1 : 1;
+  int64_t alphaW = rightTransform ? m + r - 1 : 1;
+
+  if (valueH != alphaH && valueH != 1)
+return Value();
+  if (valueW != alphaW && valueW != 1)
+return Value();
+
+  auto zeroIdx = rewriter.create(loc, 0);
+  auto nUpperBound = rewriter.create(loc, valueN);
+  auto fUpperBound = rewriter.create(loc, valueF);
+  auto oneStep = rewriter.create(loc, 1);
+
+  auto outerForOp =
+  rewriter.create(loc, zeroIdx, nUpperBound, oneStep, output);
+  Block *outerForBody = outerForOp.getBody();
+  rewriter.setInsertionPointToStart(outerForBody);
+  Value NIter = outerForBody->getArgument(0);
+
+  auto innerForOp = rewriter.create(
+  loc, zeroIdx, fUpperBound, oneStep, outerForOp.getRegionIterArgs()[0]);
+  Block *innerForBody = innerForOp.getBody();
+  rewriter.setInsertionPointToStart(innerForBody);
+  Value FIter = innerForBody->getArgument(0);

ftynse wrote:

FYI, there's a `mlir::scf::buildLoopNest` somewhere that may space you the 
boilerplate.

https://github.com/llvm/llvm-project/pull/96183
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [mlir] [mlir][linalg] Decompose winograd operators (PR #96183)

2024-06-24 Thread Oleksandr Alex Zinenko via llvm-branch-commits


@@ -48,6 +287,261 @@ Value collapse2DData(RewriterBase , Location loc, 
Value data) {
   reassociation);
 }
 
+// This function transforms the filter. The data layout of the filter is FHWC.
+// The transformation matrix is 2-dimension. We need to extract H x W from
+// FHWC first. We need to generate 2 levels of loops to iterate on F and C.
+// After the transformation, we get
+//
+// scf.for %f = lo_f to hi_f step 1
+//   scf.for %c = lo_c to hi_c step 1
+// %extracted = extract filter from filter
+// %ret = linalg.matmul G, %extracted
+// %ret = linalg.matmul %ret, GT
+// %inserted = insert %ret into filter
+//

ftynse wrote:

```suggestion
/// This function transforms the filter. The data layout of the filter is FHWC.
/// The transformation matrix is 2-dimension. We need to extract H x W from
/// FHWC first. We need to generate 2 levels of loops to iterate on F and C.
/// After the transformation, we get
///
/// scf.for %f = lo_f to hi_f step 1
///   scf.for %c = lo_c to hi_c step 1
/// %extracted = extract filter from filter
/// %ret = linalg.matmul G, %extracted
/// %ret = linalg.matmul %ret, GT
/// %inserted = insert %ret into filter
///
```

https://github.com/llvm/llvm-project/pull/96183
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [mlir] [mlir][linalg] Decompose winograd operators (PR #96183)

2024-06-24 Thread Oleksandr Alex Zinenko via llvm-branch-commits


@@ -289,6 +938,123 @@ FailureOr winogradConv2DHelper(RewriterBase 
,
   return transformedOutput.getDefiningOp();
 }
 
+FailureOr
+decomposeWinogradFilterTransformHelper(RewriterBase ,
+   linalg::WinogradFilterTransformOp op) {
+  Location loc = op.getLoc();
+  Value filter = op.getFilter();
+  auto filterType = cast(filter.getType());
+  auto filterShape = filterType.getShape();
+  int64_t filterH = filterShape[1];
+  int64_t filterW = filterShape[2];
+
+  // For F(m x 1, r x 1), we only need to do left side transform.
+  bool leftTransform = filterH != 1;
+  // For F(1 x m, 1 x r), we only need to do right side transform.
+  bool rightTransform = filterW != 1;
+  Value transformedFilter =
+  filterTransform(rewriter, loc, filter, op.getOutput(), op.getM(),
+  op.getR(), leftTransform, rightTransform);
+  if (!transformedFilter)
+return failure();
+
+  rewriter.replaceOp(op, transformedFilter);
+
+  return transformedFilter.getDefiningOp();
+}
+
+FailureOr
+decomposeWinogradInputTransformHelper(RewriterBase ,
+  linalg::WinogradInputTransformOp op) {
+  Location loc = op.getLoc();
+  Value input = op.getInput();
+  auto inputType = cast(input.getType());
+  auto inputShape = inputType.getShape();
+  int64_t inputH = inputShape[1];
+  int64_t inputW = inputShape[2];
+
+  // For F(m x 1, r x 1), we only need to do left side transform.
+  bool leftTransform = inputH != 1;
+  // For F(1 x m, 1 x r), we only need to do right side transform.
+  bool rightTransform = inputW != 1;
+  Value transformedInput =
+  inputTransform(rewriter, loc, op.getInput(), op.getOutput(), op.getM(),
+ op.getR(), leftTransform, rightTransform);
+  if (!transformedInput)
+return failure();
+
+  rewriter.replaceOp(op, transformedInput);
+
+  return transformedInput.getDefiningOp();
+}
+
+FailureOr
+decomposeWinogradOutputTransformHelper(RewriterBase ,
+   linalg::WinogradOutputTransformOp op) {
+  Location loc = op.getLoc();
+  Value value = op.getValue();
+  auto valueType = cast(value.getType());
+  auto valueShape = valueType.getShape();
+  int64_t valueH = valueShape[2];
+  int64_t valueW = valueShape[3];
+
+  // For F(m x 1, r x 1), we only need to do left side transform.
+  bool leftTransform = valueH != 1;
+  // For F(1 x m, 1 x r), we only need to do right side transform.
+  bool rightTransform = valueW != 1;
+  Value transformedOutput =
+  outputTransform(rewriter, loc, value, op.getOutput(), op.getM(),
+  op.getR(), leftTransform, rightTransform);
+  if (!transformedOutput)
+return failure();
+
+  rewriter.replaceOp(op, transformedOutput);
+
+  return transformedOutput.getDefiningOp();
+}
+
+class DecomposeWinogradFilterTransform final
+: public OpRewritePattern {
+public:
+  using OpRewritePattern::OpRewritePattern;
+
+  LogicalResult matchAndRewrite(linalg::WinogradFilterTransformOp op,
+PatternRewriter ) const override {
+if (failed(decomposeWinogradFilterTransformHelper(rewriter, op)))
+  return failure();
+
+return success();
+  }
+};
+
+class DecomposeWinogradInputTransform final
+: public OpRewritePattern {
+public:
+  using OpRewritePattern::OpRewritePattern;
+
+  LogicalResult matchAndRewrite(linalg::WinogradInputTransformOp op,
+PatternRewriter ) const override {
+if (failed(decomposeWinogradInputTransformHelper(rewriter, op)))
+  return failure();
+
+return success();

ftynse wrote:

```suggestion
return decomposeWinogradInputTransformHelper(rewriter, op);
```

https://github.com/llvm/llvm-project/pull/96183
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [mlir] [mlir][linalg] Decompose winograd operators (PR #96183)

2024-06-24 Thread Oleksandr Alex Zinenko via llvm-branch-commits


@@ -48,6 +287,261 @@ Value collapse2DData(RewriterBase , Location loc, 
Value data) {
   reassociation);
 }
 
+// This function transforms the filter. The data layout of the filter is FHWC.
+// The transformation matrix is 2-dimension. We need to extract H x W from
+// FHWC first. We need to generate 2 levels of loops to iterate on F and C.
+// After the transformation, we get
+//
+// scf.for %f = lo_f to hi_f step 1
+//   scf.for %c = lo_c to hi_c step 1
+// %extracted = extract filter from filter
+// %ret = linalg.matmul G, %extracted
+// %ret = linalg.matmul %ret, GT
+// %inserted = insert %ret into filter
+//
+Value filterTransform(RewriterBase , Location loc, Value filter,
+  Value retValue, int64_t m, int64_t r,
+  bool leftTransform = true, bool rightTransform = true) {
+  // Map from (m, r) to G transform matrix.
+  static const llvm::SmallDenseMap
+  GMatrices = {
+  {F_2_3, TransformMatrix(G_2x2_3x3, 4, 3)},
+  {F_4_3, TransformMatrix(G_4x4_3x3, 6, 3)},
+  {F_2_5, TransformMatrix(G_2x2_5x5, 6, 5)},
+  };
+
+  // Map from (m, r) to GT transform matrix.
+  static const llvm::SmallDenseMap
+  GTMatrices = {
+  {F_2_3, TransformMatrix(GT_2x2_3x3, 3, 4)},
+  {F_4_3, TransformMatrix(GT_4x4_3x3, 3, 6)},
+  {F_2_5, TransformMatrix(GT_2x2_5x5, 5, 6)},
+  };
+
+  auto filterType = cast(filter.getType());
+  Type elementType = filterType.getElementType();
+  auto filterShape = filterType.getShape(); // F, H, W, C
+  int64_t filterF = filterShape[0];
+  int64_t filterH = filterShape[1];
+  int64_t filterW = filterShape[2];
+  int64_t filterC = filterShape[3];
+
+  if (filterH != r && filterH != 1)
+return Value();
+  if (filterW != r && filterW != 1)
+return Value();
+
+  // Return shape is 
+  auto zeroIdx = rewriter.create(loc, 0);
+  auto fUpperBound = rewriter.create(loc, filterF);
+  auto cUpperBound = rewriter.create(loc, filterC);
+  auto oneStep = rewriter.create(loc, 1);
+  auto outerForOp =
+  rewriter.create(loc, zeroIdx, fUpperBound, oneStep, 
retValue);
+  Block *outerForBody = outerForOp.getBody();
+  rewriter.setInsertionPointToStart(outerForBody);
+  Value FIter = outerForBody->getArgument(0);

ftynse wrote:

There must be a function on `scf::ForOp` that returns the induction variable 
and avoids magic constant zero here.

https://github.com/llvm/llvm-project/pull/96183
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [mlir] [mlir][linalg] Decompose winograd operators (PR #96183)

2024-06-24 Thread Oleksandr Alex Zinenko via llvm-branch-commits


@@ -48,6 +287,261 @@ Value collapse2DData(RewriterBase , Location loc, 
Value data) {
   reassociation);
 }
 
+// This function transforms the filter. The data layout of the filter is FHWC.
+// The transformation matrix is 2-dimension. We need to extract H x W from
+// FHWC first. We need to generate 2 levels of loops to iterate on F and C.
+// After the transformation, we get
+//
+// scf.for %f = lo_f to hi_f step 1
+//   scf.for %c = lo_c to hi_c step 1
+// %extracted = extract filter from filter
+// %ret = linalg.matmul G, %extracted
+// %ret = linalg.matmul %ret, GT
+// %inserted = insert %ret into filter
+//
+Value filterTransform(RewriterBase , Location loc, Value filter,
+  Value retValue, int64_t m, int64_t r,
+  bool leftTransform = true, bool rightTransform = true) {
+  // Map from (m, r) to G transform matrix.
+  static const llvm::SmallDenseMap
+  GMatrices = {
+  {F_2_3, TransformMatrix(G_2x2_3x3, 4, 3)},
+  {F_4_3, TransformMatrix(G_4x4_3x3, 6, 3)},
+  {F_2_5, TransformMatrix(G_2x2_5x5, 6, 5)},
+  };
+
+  // Map from (m, r) to GT transform matrix.
+  static const llvm::SmallDenseMap
+  GTMatrices = {
+  {F_2_3, TransformMatrix(GT_2x2_3x3, 3, 4)},
+  {F_4_3, TransformMatrix(GT_4x4_3x3, 3, 6)},
+  {F_2_5, TransformMatrix(GT_2x2_5x5, 5, 6)},
+  };
+
+  auto filterType = cast(filter.getType());
+  Type elementType = filterType.getElementType();
+  auto filterShape = filterType.getShape(); // F, H, W, C
+  int64_t filterF = filterShape[0];
+  int64_t filterH = filterShape[1];
+  int64_t filterW = filterShape[2];
+  int64_t filterC = filterShape[3];
+
+  if (filterH != r && filterH != 1)
+return Value();
+  if (filterW != r && filterW != 1)
+return Value();
+
+  // Return shape is 
+  auto zeroIdx = rewriter.create(loc, 0);
+  auto fUpperBound = rewriter.create(loc, filterF);
+  auto cUpperBound = rewriter.create(loc, filterC);
+  auto oneStep = rewriter.create(loc, 1);
+  auto outerForOp =
+  rewriter.create(loc, zeroIdx, fUpperBound, oneStep, 
retValue);
+  Block *outerForBody = outerForOp.getBody();
+  rewriter.setInsertionPointToStart(outerForBody);
+  Value FIter = outerForBody->getArgument(0);
+
+  auto innerForOp = rewriter.create(
+  loc, zeroIdx, cUpperBound, oneStep, outerForOp.getRegionIterArgs()[0]);
+  Block *innerForBody = innerForOp.getBody();
+  rewriter.setInsertionPointToStart(innerForBody);
+  Value CIter = innerForBody->getArgument(0);
+
+  // Extract (H, W) from (F, H, W, C)
+  auto extractFilter = extract2DData(
+  rewriter, loc, filter, FIter, CIter, /*outLoopIdx=*/0,
+  /*inLoopIdx=*/3, /*heightIdx=*/1, /*widthIdx=*/2, /*srcSize=*/4);
+
+  TransformMapKeyTy key = {m, r};
+  int64_t retRows = 1;
+  Value matmulRetValue = extractFilter;
+  if (leftTransform) {
+// Get constant transform matrix G
+auto it = GMatrices.find(key);
+if (it == GMatrices.end())
+  return Value();
+const TransformMatrix  = it->second;
+
+retRows = GMatrix.rows;
+auto matmulType = RankedTensorType::get({retRows, filterW}, elementType);
+auto init = rewriter.create(loc, matmulType.getShape(),
+ elementType);
+
+Value G = create2DTransformMatrix(rewriter, loc, GMatrix, elementType);

ftynse wrote:

I wonder if we rather want to provide these matrices as global memrefs instead 
of creating locally every time. Have you considered that?

https://github.com/llvm/llvm-project/pull/96183
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [mlir] [mlir][linalg] Decompose winograd operators (PR #96183)

2024-06-24 Thread Oleksandr Alex Zinenko via llvm-branch-commits


@@ -323,5 +1089,12 @@ void populateWinogradConv2DPatterns(RewritePatternSet 
, int64_t m,
   patterns.insert(context, m, r);
 }
 
+void populateDecomposeWinogradOpsPatterns(RewritePatternSet ) {
+  MLIRContext *context = patterns.getContext();
+  patterns.insert(context);
+  patterns.insert(context);
+  patterns.insert(context);

ftynse wrote:

```suggestion
  patterns.insert(context);
```

https://github.com/llvm/llvm-project/pull/96183
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [mlir] [mlir][linalg] Decompose winograd operators (PR #96183)

2024-06-24 Thread Oleksandr Alex Zinenko via llvm-branch-commits


@@ -100,6 +594,161 @@ Value matrixMultiply(RewriterBase , Location loc,
   return expandOutput;
 }
 
+// This function transforms the output. The data layout of the output is HWNF.
+// The transformation matrix is 2-dimension. We need to extract H x W from
+// HWNF first. We need to generate 2 levels of loops to iterate on N and F.
+// After the transformation, we get
+//
+// scf.for %n = lo_n to hi_n step 1
+//   scf.for %f = lo_f to hi_f step 1
+// %extracted = extract input from result
+// %ret = linalg.matmul AT, %extracted
+// %ret = linalg.matmul %ret, A
+// %inserted = insert %ret into ret
+//
+Value outputTransform(RewriterBase , Location loc, Value value,
+  Value output, int64_t m, int64_t r,
+  bool leftTransform = true, bool rightTransform = true) {
+  // Map from (m, r) to AT transform matrix.
+  static const llvm::SmallDenseMap
+  ATMatrices = {
+  {F_2_3, TransformMatrix(AT_2x2_3x3, 2, 4)},
+  {F_4_3, TransformMatrix(AT_4x4_3x3, 4, 6, 32)},
+  {F_2_5, TransformMatrix(AT_2x2_5x5, 2, 6, 16)},
+  };
+
+  // Map from (m, r) to A transform matrix.
+  static const llvm::SmallDenseMap
+  AMatrices = {
+  {F_2_3, TransformMatrix(A_2x2_3x3, 4, 2)},
+  {F_4_3, TransformMatrix(A_4x4_3x3, 6, 4, 32)},
+  {F_2_5, TransformMatrix(A_2x2_5x5, 6, 2, 16)},
+  };
+
+  auto valueType = cast(value.getType());
+  Type elementType = valueType.getElementType();
+  auto valueShape = valueType.getShape(); // TileH, TileW, H, W, N, F
+  int64_t valueH = valueShape[2];
+  int64_t valueW = valueShape[3];
+  int64_t valueN = valueShape[4];
+  int64_t valueF = valueShape[5];
+  int64_t alphaH = leftTransform ? m + r - 1 : 1;
+  int64_t alphaW = rightTransform ? m + r - 1 : 1;
+
+  if (valueH != alphaH && valueH != 1)
+return Value();
+  if (valueW != alphaW && valueW != 1)
+return Value();
+
+  auto zeroIdx = rewriter.create(loc, 0);
+  auto nUpperBound = rewriter.create(loc, valueN);
+  auto fUpperBound = rewriter.create(loc, valueF);
+  auto oneStep = rewriter.create(loc, 1);
+
+  auto outerForOp =
+  rewriter.create(loc, zeroIdx, nUpperBound, oneStep, output);
+  Block *outerForBody = outerForOp.getBody();
+  rewriter.setInsertionPointToStart(outerForBody);
+  Value NIter = outerForBody->getArgument(0);
+
+  auto innerForOp = rewriter.create(
+  loc, zeroIdx, fUpperBound, oneStep, outerForOp.getRegionIterArgs()[0]);
+  Block *innerForBody = innerForOp.getBody();
+  rewriter.setInsertionPointToStart(innerForBody);
+  Value FIter = innerForBody->getArgument(0);
+
+  // Extract (H, W) from (1, 1, H, W, N, F)
+  auto extractValue = extract2DData(
+  rewriter, loc, value, NIter, FIter, /*outLoopIdx=*/4,
+  /*inLoopIdx=*/5, /*heightIdx=*/2, /*widthIdx=*/3, /*srcSize=*/6);
+
+  TransformMapKeyTy key = {m, r};
+  int64_t retRows = 1;
+  int64_t retCols = 1;
+  int64_t leftScalarFactor = 1;
+  int64_t rightScalarFactor = 1;
+  Value matmulRetValue = extractValue;
+  if (leftTransform) {
+// Get constant transform matrix AT
+auto it = ATMatrices.find(key);
+if (it == ATMatrices.end())
+  return Value();
+const TransformMatrix  = it->second;
+
+leftScalarFactor = ATMatrix.scalarFactor;
+retRows = ATMatrix.rows;
+auto matmulType = RankedTensorType::get({retRows, valueW}, elementType);
+auto init = rewriter.create(loc, matmulType.getShape(),
+ elementType);
+
+Value AT = create2DTransformMatrix(rewriter, loc, ATMatrix, elementType);
+// Multiply AT x m
+auto matmulOp = rewriter.create(
+loc, matmulType, ValueRange{AT, matmulRetValue}, ValueRange{init});
+matmulRetValue = matmulOp.getResult(0);
+  }
+
+  if (rightTransform) {
+// Get constant transform matrix T
+auto it = AMatrices.find(key);
+if (it == AMatrices.end())
+  return Value();
+const TransformMatrix  = it->second;
+
+rightScalarFactor = AMatrix.scalarFactor;
+auto matmulType =
+RankedTensorType::get({retRows, AMatrix.cols}, elementType);
+retCols = AMatrix.cols;
+auto init = rewriter.create(loc, matmulType.getShape(),
+ elementType);
+
+Value A = create2DTransformMatrix(rewriter, loc, AMatrix, elementType);
+// Multiply y = (AT x m) x A
+auto matmulOp = rewriter.create(
+loc, matmulType, ValueRange{matmulRetValue, A}, ValueRange{init});
+matmulRetValue = matmulOp.getResult(0);
+  }
+
+  // Multiply scalar factor.
+  Value scalarFactor = rewriter.create(
+  loc, FloatAttr::get(elementType, leftScalarFactor * rightScalarFactor));
+  auto matmulType = RankedTensorType::get({retRows, retCols}, elementType);
+  auto init =
+  rewriter.create(loc, matmulType.getShape(), 
elementType);
+
+  auto identityAffineMap = rewriter.getMultiDimIdentityMap(2);
+  SmallVector affineMaps = 

[llvm-branch-commits] [mlir] [mlir][linalg] Decompose winograd operators (PR #96183)

2024-06-24 Thread Oleksandr Alex Zinenko via llvm-branch-commits


@@ -100,6 +594,161 @@ Value matrixMultiply(RewriterBase , Location loc,
   return expandOutput;
 }
 
+// This function transforms the output. The data layout of the output is HWNF.
+// The transformation matrix is 2-dimension. We need to extract H x W from
+// HWNF first. We need to generate 2 levels of loops to iterate on N and F.
+// After the transformation, we get
+//
+// scf.for %n = lo_n to hi_n step 1
+//   scf.for %f = lo_f to hi_f step 1
+// %extracted = extract input from result
+// %ret = linalg.matmul AT, %extracted
+// %ret = linalg.matmul %ret, A
+// %inserted = insert %ret into ret
+//
+Value outputTransform(RewriterBase , Location loc, Value value,
+  Value output, int64_t m, int64_t r,
+  bool leftTransform = true, bool rightTransform = true) {
+  // Map from (m, r) to AT transform matrix.
+  static const llvm::SmallDenseMap
+  ATMatrices = {
+  {F_2_3, TransformMatrix(AT_2x2_3x3, 2, 4)},
+  {F_4_3, TransformMatrix(AT_4x4_3x3, 4, 6, 32)},
+  {F_2_5, TransformMatrix(AT_2x2_5x5, 2, 6, 16)},
+  };
+
+  // Map from (m, r) to A transform matrix.
+  static const llvm::SmallDenseMap
+  AMatrices = {
+  {F_2_3, TransformMatrix(A_2x2_3x3, 4, 2)},
+  {F_4_3, TransformMatrix(A_4x4_3x3, 6, 4, 32)},
+  {F_2_5, TransformMatrix(A_2x2_5x5, 6, 2, 16)},
+  };
+
+  auto valueType = cast(value.getType());
+  Type elementType = valueType.getElementType();
+  auto valueShape = valueType.getShape(); // TileH, TileW, H, W, N, F
+  int64_t valueH = valueShape[2];
+  int64_t valueW = valueShape[3];
+  int64_t valueN = valueShape[4];
+  int64_t valueF = valueShape[5];
+  int64_t alphaH = leftTransform ? m + r - 1 : 1;
+  int64_t alphaW = rightTransform ? m + r - 1 : 1;
+
+  if (valueH != alphaH && valueH != 1)
+return Value();
+  if (valueW != alphaW && valueW != 1)
+return Value();
+
+  auto zeroIdx = rewriter.create(loc, 0);
+  auto nUpperBound = rewriter.create(loc, valueN);
+  auto fUpperBound = rewriter.create(loc, valueF);
+  auto oneStep = rewriter.create(loc, 1);
+
+  auto outerForOp =
+  rewriter.create(loc, zeroIdx, nUpperBound, oneStep, output);
+  Block *outerForBody = outerForOp.getBody();
+  rewriter.setInsertionPointToStart(outerForBody);
+  Value NIter = outerForBody->getArgument(0);
+
+  auto innerForOp = rewriter.create(
+  loc, zeroIdx, fUpperBound, oneStep, outerForOp.getRegionIterArgs()[0]);
+  Block *innerForBody = innerForOp.getBody();
+  rewriter.setInsertionPointToStart(innerForBody);
+  Value FIter = innerForBody->getArgument(0);
+
+  // Extract (H, W) from (1, 1, H, W, N, F)
+  auto extractValue = extract2DData(
+  rewriter, loc, value, NIter, FIter, /*outLoopIdx=*/4,
+  /*inLoopIdx=*/5, /*heightIdx=*/2, /*widthIdx=*/3, /*srcSize=*/6);
+
+  TransformMapKeyTy key = {m, r};
+  int64_t retRows = 1;
+  int64_t retCols = 1;
+  int64_t leftScalarFactor = 1;
+  int64_t rightScalarFactor = 1;
+  Value matmulRetValue = extractValue;
+  if (leftTransform) {
+// Get constant transform matrix AT
+auto it = ATMatrices.find(key);
+if (it == ATMatrices.end())
+  return Value();
+const TransformMatrix  = it->second;
+
+leftScalarFactor = ATMatrix.scalarFactor;
+retRows = ATMatrix.rows;
+auto matmulType = RankedTensorType::get({retRows, valueW}, elementType);
+auto init = rewriter.create(loc, matmulType.getShape(),
+ elementType);
+
+Value AT = create2DTransformMatrix(rewriter, loc, ATMatrix, elementType);
+// Multiply AT x m
+auto matmulOp = rewriter.create(
+loc, matmulType, ValueRange{AT, matmulRetValue}, ValueRange{init});
+matmulRetValue = matmulOp.getResult(0);
+  }
+
+  if (rightTransform) {
+// Get constant transform matrix T
+auto it = AMatrices.find(key);
+if (it == AMatrices.end())
+  return Value();
+const TransformMatrix  = it->second;
+
+rightScalarFactor = AMatrix.scalarFactor;
+auto matmulType =
+RankedTensorType::get({retRows, AMatrix.cols}, elementType);
+retCols = AMatrix.cols;
+auto init = rewriter.create(loc, matmulType.getShape(),
+ elementType);
+
+Value A = create2DTransformMatrix(rewriter, loc, AMatrix, elementType);
+// Multiply y = (AT x m) x A
+auto matmulOp = rewriter.create(
+loc, matmulType, ValueRange{matmulRetValue, A}, ValueRange{init});
+matmulRetValue = matmulOp.getResult(0);
+  }
+
+  // Multiply scalar factor.
+  Value scalarFactor = rewriter.create(
+  loc, FloatAttr::get(elementType, leftScalarFactor * rightScalarFactor));
+  auto matmulType = RankedTensorType::get({retRows, retCols}, elementType);
+  auto init =
+  rewriter.create(loc, matmulType.getShape(), 
elementType);
+
+  auto identityAffineMap = rewriter.getMultiDimIdentityMap(2);
+  SmallVector affineMaps = 

[llvm-branch-commits] [mlir] [mlir][linalg] Decompose winograd operators (PR #96183)

2024-06-24 Thread Oleksandr Alex Zinenko via llvm-branch-commits


@@ -48,6 +287,261 @@ Value collapse2DData(RewriterBase , Location loc, 
Value data) {
   reassociation);
 }
 
+// This function transforms the filter. The data layout of the filter is FHWC.
+// The transformation matrix is 2-dimension. We need to extract H x W from
+// FHWC first. We need to generate 2 levels of loops to iterate on F and C.
+// After the transformation, we get
+//
+// scf.for %f = lo_f to hi_f step 1
+//   scf.for %c = lo_c to hi_c step 1
+// %extracted = extract filter from filter
+// %ret = linalg.matmul G, %extracted
+// %ret = linalg.matmul %ret, GT
+// %inserted = insert %ret into filter
+//
+Value filterTransform(RewriterBase , Location loc, Value filter,
+  Value retValue, int64_t m, int64_t r,
+  bool leftTransform = true, bool rightTransform = true) {
+  // Map from (m, r) to G transform matrix.
+  static const llvm::SmallDenseMap
+  GMatrices = {
+  {F_2_3, TransformMatrix(G_2x2_3x3, 4, 3)},
+  {F_4_3, TransformMatrix(G_4x4_3x3, 6, 3)},
+  {F_2_5, TransformMatrix(G_2x2_5x5, 6, 5)},
+  };
+
+  // Map from (m, r) to GT transform matrix.
+  static const llvm::SmallDenseMap
+  GTMatrices = {
+  {F_2_3, TransformMatrix(GT_2x2_3x3, 3, 4)},
+  {F_4_3, TransformMatrix(GT_4x4_3x3, 3, 6)},
+  {F_2_5, TransformMatrix(GT_2x2_5x5, 5, 6)},
+  };
+
+  auto filterType = cast(filter.getType());
+  Type elementType = filterType.getElementType();
+  auto filterShape = filterType.getShape(); // F, H, W, C
+  int64_t filterF = filterShape[0];
+  int64_t filterH = filterShape[1];
+  int64_t filterW = filterShape[2];
+  int64_t filterC = filterShape[3];
+
+  if (filterH != r && filterH != 1)
+return Value();
+  if (filterW != r && filterW != 1)
+return Value();
+
+  // Return shape is 
+  auto zeroIdx = rewriter.create(loc, 0);
+  auto fUpperBound = rewriter.create(loc, filterF);
+  auto cUpperBound = rewriter.create(loc, filterC);
+  auto oneStep = rewriter.create(loc, 1);
+  auto outerForOp =
+  rewriter.create(loc, zeroIdx, fUpperBound, oneStep, 
retValue);
+  Block *outerForBody = outerForOp.getBody();
+  rewriter.setInsertionPointToStart(outerForBody);
+  Value FIter = outerForBody->getArgument(0);
+
+  auto innerForOp = rewriter.create(
+  loc, zeroIdx, cUpperBound, oneStep, outerForOp.getRegionIterArgs()[0]);

ftynse wrote:

Ditto. there must be a better-named function for this.

https://github.com/llvm/llvm-project/pull/96183
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [mlir] [mlir][linalg] Decompose winograd operators (PR #96183)

2024-06-24 Thread Oleksandr Alex Zinenko via llvm-branch-commits


@@ -36,6 +189,92 @@ constexpr TransformMapKeyTy F_2_3{2, 3};
 constexpr TransformMapKeyTy F_4_3{4, 3};
 constexpr TransformMapKeyTy F_2_5{2, 5};
 
+struct TransformMatrix {

ftynse wrote:

Please document top-level entities.

https://github.com/llvm/llvm-project/pull/96183
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [mlir] [mlir][linalg] Decompose winograd operators (PR #96183)

2024-06-24 Thread Oleksandr Alex Zinenko via llvm-branch-commits


@@ -23,6 +26,156 @@ namespace linalg {
 
 namespace {
 
+// clang-format off
+// Winograd Conv2D uses a minimal 2D filtering algorithm to calculate its
+// result. The formula of minimal 2D filtering algorithm F(m x m, r x r),
+// m is the output dimension and r is the filter dimension, is
+//
+// Y = A^T x [ (G x g x G^T) x (B^T x d x B) ] x A
+//
+// g is filter and d is input data. We need to prepare 6 constant
+// transformation matrices, G, G^T, B^T, B, A^T, and A for this formula.
+//
+// The following tables define these constant transformation matrices for
+// F(2 x 2, 3 x 3), F(4 x 4, 3 x 3), and F(2 x 2, 5 x 5)
+constexpr float G_2x2_3x3[] = {
+   -1, 0,   0,
+ 1./2, -1./2, 1./2,
+ 1./2,  1./2, 1./2,
+0, 0,1
+};
+
+constexpr float GT_2x2_3x3[] = {
+   -1,  1./2, 1./2, 0,
+0, -1./2, 1./2, 0,
+0,  1./2, 1./2, 1
+};

ftynse wrote:

Have you considered introducing a (potentially `constexpr`) transpose function 
or some sort of transposed access iterator instead of hardcoding transposed 
matrices?

https://github.com/llvm/llvm-project/pull/96183
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] AMDGPU: Add subtarget feature for global atomic fadd denormal support (PR #96443)

2024-06-24 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm updated 
https://github.com/llvm/llvm-project/pull/96443

>From f99d34b66486a17e2fe70d372d67fbabde82d5fb Mon Sep 17 00:00:00 2001
From: Matt Arsenault 
Date: Sun, 23 Jun 2024 16:44:08 +0200
Subject: [PATCH 1/2] AMDGPU: Add subtarget feature for global atomic fadd
 denormal support

Not sure what the behavior for gfx90a is. The SPG says it always flushes.
The instruction documentation says it does not.
---
 llvm/lib/Target/AMDGPU/AMDGPU.td  | 13 +++--
 llvm/lib/Target/AMDGPU/GCNSubtarget.h |  7 +++
 2 files changed, 18 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
index 7ff861f5b144d..5f798b4391704 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -788,6 +788,13 @@ def FeatureFlatAtomicFaddF32Inst
   "Has flat_atomic_add_f32 instruction"
 >;
 
+def FeatureMemoryAtomicFaddF32DenormalSupport
+  : SubtargetFeature<"memory-atomic-fadd-f32-denormal-support",
+  "HasAtomicMemoryAtomicFaddF32DenormalSupport",
+  "true",
+  "global/flat/buffer atomic fadd for float supports denormal handling"
+>;
+
 def FeatureAgentScopeFineGrainedRemoteMemoryAtomics
   : SubtargetFeature<"agent-scope-fine-grained-remote-memory-atomics",
   "HasAgentScopeFineGrainedRemoteMemoryAtomics",
@@ -1425,7 +1432,8 @@ def FeatureISAVersion9_4_Common : FeatureSet<
FeatureKernargPreload,
FeatureAtomicFMinFMaxF64GlobalInsts,
FeatureAtomicFMinFMaxF64FlatInsts,
-   FeatureAgentScopeFineGrainedRemoteMemoryAtomics
+   FeatureAgentScopeFineGrainedRemoteMemoryAtomics,
+   FeatureMemoryAtomicFaddF32DenormalSupport
]>;
 
 def FeatureISAVersion9_4_0 : FeatureSet<
@@ -1628,7 +1636,8 @@ def FeatureISAVersion12 : FeatureSet<
FeatureVGPRSingleUseHintInsts,
FeatureScalarDwordx3Loads,
FeatureDPPSrc1SGPR,
-   FeatureMaxHardClauseLength32]>;
+   FeatureMaxHardClauseLength32,
+   FeatureMemoryAtomicFaddF32DenormalSupport]>;
 
 def FeatureISAVersion12_Generic: FeatureSet<
   !listconcat(FeatureISAVersion12.Features,
diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h 
b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
index c40efbdcf7f0b..674d84422538f 100644
--- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
@@ -167,6 +167,7 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
   bool HasAtomicFlatPkAdd16Insts = false;
   bool HasAtomicFaddRtnInsts = false;
   bool HasAtomicFaddNoRtnInsts = false;
+  bool HasAtomicMemoryAtomicFaddF32DenormalSupport = false;
   bool HasAtomicBufferGlobalPkAddF16NoRtnInsts = false;
   bool HasAtomicBufferGlobalPkAddF16Insts = false;
   bool HasAtomicCSubNoRtnInsts = false;
@@ -872,6 +873,12 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
 
   bool hasFlatAtomicFaddF32Inst() const { return HasFlatAtomicFaddF32Inst; }
 
+  /// \return true if the target's flat, global, and buffer atomic fadd for
+  /// float supports denormal handling.
+  bool hasMemoryAtomicFaddF32DenormalSupport() const {
+return HasAtomicMemoryAtomicFaddF32DenormalSupport;
+  }
+
   /// \return true if atomic operations targeting fine-grained memory work
   /// correctly at device scope, in allocations in host or peer PCIe device
   /// memory.

>From 45941357740eb44a7c5828bf7c534aef65448226 Mon Sep 17 00:00:00 2001
From: Matt Arsenault 
Date: Mon, 24 Jun 2024 12:10:37 +0200
Subject: [PATCH 2/2] Add to gfx11.

RDNA 3 manual says "Floating-point addition handles NAN/INF/denorm"
thought I'm not sure I trust it.
---
 llvm/lib/Target/AMDGPU/AMDGPU.td | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
index 5f798b4391704..0ec65f759bc35 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -1545,7 +1545,8 @@ def FeatureISAVersion11_Common : FeatureSet<
FeatureFlatAtomicFaddF32Inst,
FeatureImageInsts,
FeaturePackedTID,
-   FeatureVcmpxPermlaneHazard]>;
+   FeatureVcmpxPermlaneHazard,
+   FeatureMemoryAtomicFaddF32DenormalSupport]>;
 
 // There are few workarounds that need to be
 // added to all targets. This pessimizes codegen

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] AMDGPU: Add a subtarget feature for fine-grained remote memory support (PR #96442)

2024-06-24 Thread Matt Arsenault via llvm-branch-commits

arsenm wrote:

> Actually not, we do not know the bus. Moreover, we know this is opposite.

On gfx940/gfx12, we don't need to know the bus to handle the agent scope case 
correctly. The instructions still function, just always at device scope. We do 
need to know the bus and/or location to handle the system scope correctly, 
which will come from the new metadata.

https://github.com/llvm/llvm-project/pull/96442
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [AMDGPU][SILoadStoreOptimizer] Merge constrained sloads (PR #96162)

2024-06-24 Thread Stanislav Mekhanoshin via llvm-branch-commits


@@ -1701,17 +1732,33 @@ unsigned SILoadStoreOptimizer::getNewOpcode(const 
CombineInfo ,
   return AMDGPU::S_BUFFER_LOAD_DWORDX8_SGPR_IMM;
 }
   case S_LOAD_IMM:
-switch (Width) {
-default:
-  return 0;
-case 2:
-  return AMDGPU::S_LOAD_DWORDX2_IMM;
-case 3:
-  return AMDGPU::S_LOAD_DWORDX3_IMM;
-case 4:
-  return AMDGPU::S_LOAD_DWORDX4_IMM;
-case 8:
-  return AMDGPU::S_LOAD_DWORDX8_IMM;
+// For targets that support XNACK replay, use the constrained load opcode.
+if (STI && STI->hasXnackReplay()) {
+  switch (Width) {

rampitec wrote:

> > currently the alignment is picked from the first MMO and that'd definitely 
> > be smaller than the natural align requirement for the new load
> 
> You don't know that - the alignment in the first MMO will be whatever 
> alignment the compiler could deduce, which could be large, e.g. if the 
> pointer used for the first load was known to have a large alignment.

Moreover, it can easily be as large as a page. In a case of scalar load and 
kernarg.

https://github.com/llvm/llvm-project/pull/96162
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] AMDGPU: Add a subtarget feature for fine-grained remote memory support (PR #96442)

2024-06-24 Thread Stanislav Mekhanoshin via llvm-branch-commits

rampitec wrote:

> We do statically know for some of the targets (mostly gfx12 and gfx940) that 
> it's supposed to work. This is the "scope downgrade" vs. "nop" cases in the 
> atomic support table

Actually not, we do not know the bus. Moreover, we know this is opposite.

https://github.com/llvm/llvm-project/pull/96442
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [AMDGPU][SILoadStoreOptimizer] Merge constrained sloads (PR #96162)

2024-06-24 Thread Christudasan Devadasan via llvm-branch-commits


@@ -1701,17 +1732,33 @@ unsigned SILoadStoreOptimizer::getNewOpcode(const 
CombineInfo ,
   return AMDGPU::S_BUFFER_LOAD_DWORDX8_SGPR_IMM;
 }
   case S_LOAD_IMM:
-switch (Width) {
-default:
-  return 0;
-case 2:
-  return AMDGPU::S_LOAD_DWORDX2_IMM;
-case 3:
-  return AMDGPU::S_LOAD_DWORDX3_IMM;
-case 4:
-  return AMDGPU::S_LOAD_DWORDX4_IMM;
-case 8:
-  return AMDGPU::S_LOAD_DWORDX8_IMM;
+// For targets that support XNACK replay, use the constrained load opcode.
+if (STI && STI->hasXnackReplay()) {
+  switch (Width) {

cdevadas wrote:

> > currently the alignment is picked from the first MMO and that'd definitely 
> > be smaller than the natural align requirement for the new load
> 
> You don't know that - the alignment in the first MMO will be whatever 
> alignment the compiler could deduce, which could be large, e.g. if the 
> pointer used for the first load was known to have a large alignment.

Are you suggesting to check the alignment in the first MMO and see if it is 
still the preferred alignment for the merge-load? 
Use the _ec if the alignment is found to be smaller than the expected value.

https://github.com/llvm/llvm-project/pull/96162
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [AMDGPU][SILoadStoreOptimizer] Merge constrained sloads (PR #96162)

2024-06-24 Thread Jay Foad via llvm-branch-commits

https://github.com/jayfoad edited 
https://github.com/llvm/llvm-project/pull/96162
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [AMDGPU][SILoadStoreOptimizer] Merge constrained sloads (PR #96162)

2024-06-24 Thread Jay Foad via llvm-branch-commits


@@ -1701,17 +1732,33 @@ unsigned SILoadStoreOptimizer::getNewOpcode(const 
CombineInfo ,
   return AMDGPU::S_BUFFER_LOAD_DWORDX8_SGPR_IMM;
 }
   case S_LOAD_IMM:
-switch (Width) {
-default:
-  return 0;
-case 2:
-  return AMDGPU::S_LOAD_DWORDX2_IMM;
-case 3:
-  return AMDGPU::S_LOAD_DWORDX3_IMM;
-case 4:
-  return AMDGPU::S_LOAD_DWORDX4_IMM;
-case 8:
-  return AMDGPU::S_LOAD_DWORDX8_IMM;
+// For targets that support XNACK replay, use the constrained load opcode.
+if (STI && STI->hasXnackReplay()) {
+  switch (Width) {

jayfoad wrote:

> currently the alignment is picked from the first MMO and that'd definitely be 
> smaller than the natural align requirement for the new load

You don't know that - the alignment in the first MMO will be whatever alignment 
the compiler could deduce, which could be large.

https://github.com/llvm/llvm-project/pull/96162
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [AMDGPU] Codegen support for constrained multi-dword sloads (PR #96163)

2024-06-24 Thread Jay Foad via llvm-branch-commits


@@ -867,13 +867,104 @@ def SMRDBufferImm   : ComplexPattern;
 def SMRDBufferImm32 : ComplexPattern;
 def SMRDBufferSgprImm : ComplexPattern;
 
+class SMRDAlignedLoadPat : PatFrag <(ops node:$ptr), (Op 
node:$ptr), [{
+  // Returns true if it is a naturally aligned multi-dword load.
+  LoadSDNode *Ld = cast(N);
+  unsigned Size = Ld->getMemoryVT().getStoreSize();
+  return (Size <= 4) || (Ld->getAlign().value() >= PowerOf2Ceil(Size));

jayfoad wrote:

`Ld->getAlign().value()` will never be 12. There's no such thing as a 
non-power-of-two alignment.

https://github.com/llvm/llvm-project/pull/96163
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits