[llvm-branch-commits] [llvm] [Remarks] Auto-detect remark parser format (PR #144554)

2025-06-17 Thread Tobias Stadler via llvm-branch-commits

https://github.com/tobias-stadler updated 
https://github.com/llvm/llvm-project/pull/144554

>From a428e237fcc52830549144bf3afdcddb29742b0d Mon Sep 17 00:00:00 2001
From: Tobias Stadler 
Date: Mon, 16 Jun 2025 15:32:15 +0100
Subject: [PATCH 1/2] [Remarks] Auto-detect remark parser format

Add remark format 'Auto', which performs automatic detection of the
remark format using the magic numbers at the beginning of the remarks
files.

The RemarkLinker already did something similar, so we streamlined this
and exposed this to llvm-remarkutil.

Depends on #144527
---
 llvm/include/llvm/Remarks/RemarkFormat.h  |  5 -
 llvm/include/llvm/Remarks/RemarkLinker.h  |  5 ++---
 llvm/lib/Remarks/RemarkFormat.cpp | 18 +++-
 llvm/lib/Remarks/RemarkLinker.cpp | 14 +++--
 llvm/lib/Remarks/RemarkParser.cpp | 21 +--
 llvm/lib/Remarks/RemarkSerializer.cpp |  6 --
 .../Inputs/broken-remark-magic.bitstream  |  1 +
 .../llvm-remarkutil/annotation-count.test |  2 ++
 .../broken-bitstream-remark-magic.test|  6 ++
 .../tools/llvm-remarkutil/empty-file.test |  5 +
 .../llvm-remarkutil/instruction-count.test|  4 +++-
 .../llvm-remarkutil/instruction-mix.test  |  4 +++-
 .../size-diff/no-difference.test  |  3 +++
 .../tools/llvm-remarkutil/RemarkUtilHelpers.h |  9 +---
 llvm/unittests/Remarks/RemarksLinkingTest.cpp |  4 +---
 15 files changed, 75 insertions(+), 32 deletions(-)
 create mode 100644 
llvm/test/tools/llvm-remarkutil/Inputs/broken-remark-magic.bitstream
 create mode 100644 
llvm/test/tools/llvm-remarkutil/broken-bitstream-remark-magic.test

diff --git a/llvm/include/llvm/Remarks/RemarkFormat.h 
b/llvm/include/llvm/Remarks/RemarkFormat.h
index a39a013dcf905..eda201d4ee6f1 100644
--- a/llvm/include/llvm/Remarks/RemarkFormat.h
+++ b/llvm/include/llvm/Remarks/RemarkFormat.h
@@ -23,7 +23,7 @@ namespace remarks {
 constexpr StringLiteral Magic("REMARKS");
 
 /// The format used for serializing/deserializing remarks.
-enum class Format { Unknown, YAML, Bitstream };
+enum class Format { Unknown, Auto, YAML, Bitstream };
 
 /// Parse and validate a string for the remark format.
 LLVM_ABI Expected parseFormat(StringRef FormatStr);
@@ -31,6 +31,9 @@ LLVM_ABI Expected parseFormat(StringRef FormatStr);
 /// Parse and validate a magic number to a remark format.
 LLVM_ABI Expected magicToFormat(StringRef Magic);
 
+/// Detect format based on selected format and magic number
+LLVM_ABI Expected detectFormat(Format Selected, StringRef Magic);
+
 } // end namespace remarks
 } // end namespace llvm
 
diff --git a/llvm/include/llvm/Remarks/RemarkLinker.h 
b/llvm/include/llvm/Remarks/RemarkLinker.h
index 5343c62144708..67208f40592a5 100644
--- a/llvm/include/llvm/Remarks/RemarkLinker.h
+++ b/llvm/include/llvm/Remarks/RemarkLinker.h
@@ -80,13 +80,12 @@ struct RemarkLinker {
   /// \p Buffer.
   /// \p Buffer can be either a standalone remark container or just
   /// metadata. This takes care of uniquing and merging the remarks.
-  LLVM_ABI Error link(StringRef Buffer,
-  std::optional RemarkFormat = std::nullopt);
+  LLVM_ABI Error link(StringRef Buffer, Format RemarkFormat = Format::Auto);
 
   /// Link the remarks found in \p Obj by looking for the right section and
   /// calling the method above.
   LLVM_ABI Error link(const object::ObjectFile &Obj,
-  std::optional RemarkFormat = std::nullopt);
+  Format RemarkFormat = Format::Auto);
 
   /// Serialize the linked remarks to the stream \p OS, using the format \p
   /// RemarkFormat.
diff --git a/llvm/lib/Remarks/RemarkFormat.cpp 
b/llvm/lib/Remarks/RemarkFormat.cpp
index 800f5bffe70da..1c52e352f9392 100644
--- a/llvm/lib/Remarks/RemarkFormat.cpp
+++ b/llvm/lib/Remarks/RemarkFormat.cpp
@@ -42,6 +42,22 @@ Expected llvm::remarks::magicToFormat(StringRef 
MagicStr) {
 
   if (Result == Format::Unknown)
 return createStringError(std::make_error_code(std::errc::invalid_argument),
- "Unknown remark magic: '%s'", MagicStr.data());
+ "Automatic detection of remark format failed. "
+ "Unknown magic number: '%.4s'",
+ MagicStr.data());
   return Result;
 }
+
+Expected llvm::remarks::detectFormat(Format Selected,
+ StringRef MagicStr) {
+  if (Selected == Format::Unknown)
+return createStringError(std::make_error_code(std::errc::invalid_argument),
+ "Unknown remark parser format.");
+  if (Selected != Format::Auto)
+return Selected;
+
+  // Empty files are valid bitstream files
+  if (MagicStr.empty())
+return Format::Bitstream;
+  return magicToFormat(MagicStr);
+}
diff --git a/llvm/lib/Remarks/RemarkLinker.cpp 
b/llvm/lib/Remarks/RemarkLinker.cpp
index b8395aa135d82..0ca6217edfddd 100644
--- a/llvm/lib

[llvm-branch-commits] [llvm] [Remarks] Auto-detect remark parser format (PR #144554)

2025-06-17 Thread Jon Roelofs via llvm-branch-commits

https://github.com/jroelofs approved this pull request.

Love it!

https://github.com/llvm/llvm-project/pull/144554
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [Remarks] Auto-detect remark parser format (PR #144554)

2025-06-17 Thread Tobias Stadler via llvm-branch-commits

https://github.com/tobias-stadler created 
https://github.com/llvm/llvm-project/pull/144554

Add remark format 'Auto', which performs automatic detection of the remark 
format using the magic numbers at the beginning of the remarks files.

The RemarkLinker already did something similar, so we streamlined this and 
exposed this to llvm-remarkutil.

Depends on #144527

>From a428e237fcc52830549144bf3afdcddb29742b0d Mon Sep 17 00:00:00 2001
From: Tobias Stadler 
Date: Mon, 16 Jun 2025 15:32:15 +0100
Subject: [PATCH] [Remarks] Auto-detect remark parser format

Add remark format 'Auto', which performs automatic detection of the
remark format using the magic numbers at the beginning of the remarks
files.

The RemarkLinker already did something similar, so we streamlined this
and exposed this to llvm-remarkutil.

Depends on #144527
---
 llvm/include/llvm/Remarks/RemarkFormat.h  |  5 -
 llvm/include/llvm/Remarks/RemarkLinker.h  |  5 ++---
 llvm/lib/Remarks/RemarkFormat.cpp | 18 +++-
 llvm/lib/Remarks/RemarkLinker.cpp | 14 +++--
 llvm/lib/Remarks/RemarkParser.cpp | 21 +--
 llvm/lib/Remarks/RemarkSerializer.cpp |  6 --
 .../Inputs/broken-remark-magic.bitstream  |  1 +
 .../llvm-remarkutil/annotation-count.test |  2 ++
 .../broken-bitstream-remark-magic.test|  6 ++
 .../tools/llvm-remarkutil/empty-file.test |  5 +
 .../llvm-remarkutil/instruction-count.test|  4 +++-
 .../llvm-remarkutil/instruction-mix.test  |  4 +++-
 .../size-diff/no-difference.test  |  3 +++
 .../tools/llvm-remarkutil/RemarkUtilHelpers.h |  9 +---
 llvm/unittests/Remarks/RemarksLinkingTest.cpp |  4 +---
 15 files changed, 75 insertions(+), 32 deletions(-)
 create mode 100644 
llvm/test/tools/llvm-remarkutil/Inputs/broken-remark-magic.bitstream
 create mode 100644 
llvm/test/tools/llvm-remarkutil/broken-bitstream-remark-magic.test

diff --git a/llvm/include/llvm/Remarks/RemarkFormat.h 
b/llvm/include/llvm/Remarks/RemarkFormat.h
index a39a013dcf905..eda201d4ee6f1 100644
--- a/llvm/include/llvm/Remarks/RemarkFormat.h
+++ b/llvm/include/llvm/Remarks/RemarkFormat.h
@@ -23,7 +23,7 @@ namespace remarks {
 constexpr StringLiteral Magic("REMARKS");
 
 /// The format used for serializing/deserializing remarks.
-enum class Format { Unknown, YAML, Bitstream };
+enum class Format { Unknown, Auto, YAML, Bitstream };
 
 /// Parse and validate a string for the remark format.
 LLVM_ABI Expected parseFormat(StringRef FormatStr);
@@ -31,6 +31,9 @@ LLVM_ABI Expected parseFormat(StringRef FormatStr);
 /// Parse and validate a magic number to a remark format.
 LLVM_ABI Expected magicToFormat(StringRef Magic);
 
+/// Detect format based on selected format and magic number
+LLVM_ABI Expected detectFormat(Format Selected, StringRef Magic);
+
 } // end namespace remarks
 } // end namespace llvm
 
diff --git a/llvm/include/llvm/Remarks/RemarkLinker.h 
b/llvm/include/llvm/Remarks/RemarkLinker.h
index 5343c62144708..67208f40592a5 100644
--- a/llvm/include/llvm/Remarks/RemarkLinker.h
+++ b/llvm/include/llvm/Remarks/RemarkLinker.h
@@ -80,13 +80,12 @@ struct RemarkLinker {
   /// \p Buffer.
   /// \p Buffer can be either a standalone remark container or just
   /// metadata. This takes care of uniquing and merging the remarks.
-  LLVM_ABI Error link(StringRef Buffer,
-  std::optional RemarkFormat = std::nullopt);
+  LLVM_ABI Error link(StringRef Buffer, Format RemarkFormat = Format::Auto);
 
   /// Link the remarks found in \p Obj by looking for the right section and
   /// calling the method above.
   LLVM_ABI Error link(const object::ObjectFile &Obj,
-  std::optional RemarkFormat = std::nullopt);
+  Format RemarkFormat = Format::Auto);
 
   /// Serialize the linked remarks to the stream \p OS, using the format \p
   /// RemarkFormat.
diff --git a/llvm/lib/Remarks/RemarkFormat.cpp 
b/llvm/lib/Remarks/RemarkFormat.cpp
index 800f5bffe70da..1c52e352f9392 100644
--- a/llvm/lib/Remarks/RemarkFormat.cpp
+++ b/llvm/lib/Remarks/RemarkFormat.cpp
@@ -42,6 +42,22 @@ Expected llvm::remarks::magicToFormat(StringRef 
MagicStr) {
 
   if (Result == Format::Unknown)
 return createStringError(std::make_error_code(std::errc::invalid_argument),
- "Unknown remark magic: '%s'", MagicStr.data());
+ "Automatic detection of remark format failed. "
+ "Unknown magic number: '%.4s'",
+ MagicStr.data());
   return Result;
 }
+
+Expected llvm::remarks::detectFormat(Format Selected,
+ StringRef MagicStr) {
+  if (Selected == Format::Unknown)
+return createStringError(std::make_error_code(std::errc::invalid_argument),
+ "Unknown remark parser format.");
+  if (Selected != Format::Auto)
+return Selected;
+
+  //