This is an automated email from the ASF dual-hosted git repository.

lordgamez pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/nifi-minifi-cpp.git

commit 6e8114aa9fd60e472ec0905767e911779a5ffc7b
Author: Ferenc Gerlits <[email protected]>
AuthorDate: Tue Jun 17 09:28:42 2025 +0200

    MINIFICPP-2576 Improve CWEL performance
    
    - add caching to WindowsEventLogMetadata::getEventData()
    - replace regex lookup with endsWith
    - add CMakeSettings.json to .gitignore
    - use a defaulted == operator instead of hand-rolling it
    
    Signed-off-by: Gabor Gyimesi <[email protected]>
    
    This closes #1985
---
 .gitignore                                         |  1 +
 .../windows-event-log/ConsumeWindowsEventLog.cpp   | 33 +++++----
 .../windows-event-log/ConsumeWindowsEventLog.h     |  4 +-
 .../tests/ConsumeWindowsEventLogTests.cpp          | 24 ++++++
 .../tests/MetadataWalkerTests.cpp                  | 86 ++++++++++++----------
 .../windows-event-log/wel/MetadataWalker.cpp       |  9 +--
 extensions/windows-event-log/wel/MetadataWalker.h  |  9 +--
 .../windows-event-log/wel/WindowsEventLog.cpp      | 46 ++++++++----
 extensions/windows-event-log/wel/WindowsEventLog.h | 44 +++++++++--
 9 files changed, 171 insertions(+), 85 deletions(-)

diff --git a/.gitignore b/.gitignore
index 8a57ba1c0..0230c6b91 100644
--- a/.gitignore
+++ b/.gitignore
@@ -39,6 +39,7 @@ assemblies
 CMakeCache.txt
 CMakeFiles
 CMakeScripts
+CMakeSettings.json
 cmake_install.cmake
 install_manifest.txt
 CTestTestfile.cmake
diff --git a/extensions/windows-event-log/ConsumeWindowsEventLog.cpp 
b/extensions/windows-event-log/ConsumeWindowsEventLog.cpp
index 24ed13c25..e5bce24e3 100644
--- a/extensions/windows-event-log/ConsumeWindowsEventLog.cpp
+++ b/extensions/windows-event-log/ConsumeWindowsEventLog.cpp
@@ -23,15 +23,10 @@
 #include <vector>
 #include <tuple>
 #include <utility>
-#include <queue>
 #include <map>
-#include <set>
 #include <sstream>
 #include <string>
-#include <iostream>
 #include <memory>
-#include <regex>
-#include <cinttypes>
 
 #include "wel/LookupCacher.h"
 #include "wel/MetadataWalker.h"
@@ -49,6 +44,8 @@
 
 #include "utils/gsl.h"
 #include "utils/OsUtils.h"
+#include "utils/RegexUtils.h"
+#include "utils/StringUtils.h"
 #include "utils/UnicodeConversion.h"
 #include "utils/ProcessorConfigUtils.h"
 
@@ -140,11 +137,7 @@ void 
ConsumeWindowsEventLog::onSchedule(core::ProcessContext& context, core::Pro
     }
   }
 
-  regex_.reset();
-  if (auto identifier_matcher = context.getProperty(IdentifierMatcher); 
identifier_matcher && !identifier_matcher->empty()) {
-    regex_.emplace(*identifier_matcher);
-  }
-
+  sid_matcher_ = cwel::parseSidMatcher(utils::parseOptionalProperty(context, 
IdentifierMatcher));
   output_format_ = utils::parseEnumProperty<cwel::OutputFormat>(context, 
OutputFormatProperty);
   json_format_ = utils::parseEnumProperty<cwel::JsonFormat>(context, 
JsonFormatProperty);
 
@@ -195,6 +188,20 @@ void 
ConsumeWindowsEventLog::onSchedule(core::ProcessContext& context, core::Pro
   logger_->log_trace("Successfully configured CWEL");
 }
 
+std::function<bool(std::string_view)> cwel::parseSidMatcher(const 
std::optional<std::string>& sid_matcher) {
+  if (!sid_matcher || sid_matcher->empty()) {
+    return [](std::string_view){ return false; };
+  }
+
+  if (std::smatch match; utils::regexMatch(*sid_matcher, match, 
utils::Regex{R"_(\.\*(\w+))_"})) {
+    std::string suffix = match[1];
+    return [suffix](std::string_view field_name) { return 
utils::string::endsWith(field_name, suffix); };
+  }
+
+  utils::Regex sid_matcher_regex{*sid_matcher};
+  return [sid_matcher_regex](std::string_view field_name) { return 
utils::regexMatch(field_name, sid_matcher_regex); };
+}
+
 bool ConsumeWindowsEventLog::commitAndSaveBookmark(const std::wstring 
&bookmark_xml, core::ProcessContext& context, core::ProcessSession& session) {
   {
     const TimeDiff time_diff;
@@ -323,7 +330,7 @@ wel::WindowsEventLogHandler& 
ConsumeWindowsEventLog::getEventLogHandler(const st
   auto opened_publisher_metadata_provider = EvtOpenPublisherMetadata(nullptr, 
widechar, nullptr, 0, 0);
   if (!opened_publisher_metadata_provider)
     logger_->log_warn("EvtOpenPublisherMetadata failed due to {}", 
utils::OsUtils::windowsErrorToErrorCode(GetLastError()).message());
-  providers_[name] = 
wel::WindowsEventLogHandler(opened_publisher_metadata_provider);
+  providers_.emplace(name, opened_publisher_metadata_provider);
   logger_->log_info("Handler not found for {}, creating. Number of cached 
handlers: {}", name, providers_.size());
   return providers_[name];
 }
@@ -459,8 +466,8 @@ nonstd::expected<cwel::EventRender, std::string> 
ConsumeWindowsEventLog::createE
 
   // this is a well known path.
   std::string provider_name = 
doc.child("Event").child("System").child("Provider").attribute("Name").value();
-  wel::WindowsEventLogMetadataImpl 
metadata{getEventLogHandler(provider_name).getMetadata(), hEvent};
-  wel::MetadataWalker walker{metadata, path_.str(), !resolve_as_attributes_, 
apply_identifier_function_, regex_ ? &*regex_ : nullptr, 
userIdToUsernameFunction()};
+  wel::WindowsEventLogMetadataImpl metadata{getEventLogHandler(provider_name), 
hEvent};
+  wel::MetadataWalker walker{metadata, path_.str(), !resolve_as_attributes_, 
apply_identifier_function_, sid_matcher_, userIdToUsernameFunction()};
 
   // resolve the event metadata
   doc.traverse(walker);
diff --git a/extensions/windows-event-log/ConsumeWindowsEventLog.h 
b/extensions/windows-event-log/ConsumeWindowsEventLog.h
index 263e9902c..3c48a3220 100644
--- a/extensions/windows-event-log/ConsumeWindowsEventLog.h
+++ b/extensions/windows-event-log/ConsumeWindowsEventLog.h
@@ -74,6 +74,8 @@ enum class JsonFormat {
   Simple,
   Flattened,
 };
+
+std::function<bool(std::string_view)> parseSidMatcher(const 
std::optional<std::string>& sid_matcher);
 }  // namespace cwel
 
 class Bookmark;
@@ -242,7 +244,7 @@ class ConsumeWindowsEventLog : public core::ProcessorImpl {
   std::optional<std::string> header_delimiter_;
   wel::EventPath path_;
   std::wstring wstr_query_;
-  std::optional<utils::Regex> regex_;
+  std::function<bool(std::string_view)> sid_matcher_;
   bool resolve_as_attributes_{false};
   bool apply_identifier_function_{false};
   std::string provenanceUri_;
diff --git a/extensions/windows-event-log/tests/ConsumeWindowsEventLogTests.cpp 
b/extensions/windows-event-log/tests/ConsumeWindowsEventLogTests.cpp
index f8b66e3ae..353e91555 100644
--- a/extensions/windows-event-log/tests/ConsumeWindowsEventLogTests.cpp
+++ b/extensions/windows-event-log/tests/ConsumeWindowsEventLogTests.cpp
@@ -541,4 +541,28 @@ TEST_CASE("ConsumeWindowsEventLog can process events from 
a log file", "[cwel][l
   )json");
 }
 
+TEST_CASE("ConsumeWindowsEventLog::parseSidMatcher works correctly") {
+  const auto verify = [](const std::optional<std::string>& sid_matcher, 
std::string_view field_name, bool result) {
+    const auto matcher_function = 
minifi::processors::cwel::parseSidMatcher(sid_matcher);
+    CHECK(matcher_function(field_name) == result);
+  };
+
+  verify(std::nullopt, "UserSid", false);
+  verify(std::nullopt, "WidthOfSidewalk", false);
+  verify(std::nullopt, "Channel", false);
+
+  verify(".*Sid", "UserSid", true);
+  verify(".*Sid", "WidthOfSidewalk", false);
+  verify(".*Sid", "Channel", false);
+
+  verify(".*Sid.*", "UserSid", true);
+  verify(".*Sid.*", "WidthOfSidewalk", true);
+  verify(".*Sid.*", "Channel", false);
+
+  verify("Sid", "UserSid", false);
+  verify("Sid", "WidthOfSidewalk", false);
+  verify("Sid", "Channel", false);
+  verify("Sid", "Sid", true);
+}
+
 }  // namespace org::apache::nifi::minifi::test
diff --git a/extensions/windows-event-log/tests/MetadataWalkerTests.cpp 
b/extensions/windows-event-log/tests/MetadataWalkerTests.cpp
index 300b8009a..678e0c768 100644
--- a/extensions/windows-event-log/tests/MetadataWalkerTests.cpp
+++ b/extensions/windows-event-log/tests/MetadataWalkerTests.cpp
@@ -19,6 +19,7 @@
 #include <map>
 #include <string>
 
+#include "ConsumeWindowsEventLog.h"
 #include "unit/TestBase.h"
 #include "unit/Catch.h"
 #include "core/Core.h"
@@ -29,15 +30,20 @@
 
 using METADATA = org::apache::nifi::minifi::wel::METADATA;
 using MetadataWalker = org::apache::nifi::minifi::wel::MetadataWalker;
+using WindowsEventLogHandler = 
org::apache::nifi::minifi::wel::WindowsEventLogHandler;
 using WindowsEventLogMetadata = 
org::apache::nifi::minifi::wel::WindowsEventLogMetadata;
 using WindowsEventLogMetadataImpl = 
org::apache::nifi::minifi::wel::WindowsEventLogMetadataImpl;
 using XmlString = org::apache::nifi::minifi::wel::XmlString;
 
 namespace {
 
-std::string updateXmlMetadata(const std::string &xml, EVT_HANDLE metadata_ptr, 
EVT_HANDLE event_ptr, bool update_xml, bool resolve, utils::Regex const* regex 
= nullptr) {
-  WindowsEventLogMetadataImpl metadata{metadata_ptr, event_ptr};
-  MetadataWalker walker(metadata, "", update_xml, resolve, regex, 
&utils::OsUtils::userIdToUsername);
+auto none_matcher = minifi::processors::cwel::parseSidMatcher(std::nullopt);
+
+std::string updateXmlMetadata(const std::string &xml, EVT_HANDLE metadata_ptr, 
EVT_HANDLE event_ptr, bool update_xml, bool resolve,
+    const std::function<bool(std::string_view)>& sid_matcher = none_matcher) {
+  WindowsEventLogHandler provider{metadata_ptr};
+  WindowsEventLogMetadataImpl metadata{provider, event_ptr};
+  MetadataWalker walker(metadata, "", update_xml, resolve, sid_matcher, 
&utils::OsUtils::userIdToUsername);
 
   pugi::xml_document doc;
   pugi::xml_parse_result result = doc.load_string(xml.c_str());
@@ -74,7 +80,9 @@ const short event_type_index = 178;  // NOLINT short comes 
from WINDOWS API
 
 class FakeWindowsEventLogMetadata : public WindowsEventLogMetadata {
  public:
-  [[nodiscard]] std::string getEventData(EVT_FORMAT_MESSAGE_FLAGS flags) const 
override { return "event_data_for_flag_" + std::to_string(flags); }
+  [[nodiscard]] std::string getEventData(EVT_FORMAT_MESSAGE_FLAGS field, const 
std::string&) const override {
+    return "event_data_for_field_" + std::to_string(field);
+  }
   [[nodiscard]] std::string getEventTimestamp() const override { return 
"event_timestamp"; }
   short getEventTypeIndex() const override { return event_type_index; }  // 
NOLINT short comes from WINDOWS API
 };
@@ -90,8 +98,8 @@ TEST_CASE("MetadataWalker updates the Sid in the XML if both 
update_xml and reso
 
   SECTION("Resolve nobody") {
     std::string nobody = readFile("resources/withsids.xml");
-    auto regex = utils::Regex(".*Sid");
-    REQUIRE(updateXmlMetadata(xml, nullptr, nullptr, true, true, &regex) == 
formatXml(nobody));
+    const auto sid_matcher = 
minifi::processors::cwel::parseSidMatcher(".*Sid");
+    REQUIRE(updateXmlMetadata(xml, nullptr, nullptr, true, true, sid_matcher) 
== formatXml(nobody));
   }
 }
 
@@ -105,8 +113,8 @@ TEST_CASE("MetadataWalker updates the Security/UserId 
attribute", "[updateXmlMet
 
   SECTION("Resolve nobody") {
     std::string nobody = readFile("resources/resolveduserid.xml");
-    auto regex = utils::Regex("(.*Sid)|UserID");
-    REQUIRE(updateXmlMetadata(xml, nullptr, nullptr, true, true, &regex) == 
formatXml(nobody));
+    const auto sid_matcher = 
minifi::processors::cwel::parseSidMatcher("(.*Sid)|UserID");
+    REQUIRE(updateXmlMetadata(xml, nullptr, nullptr, true, true, sid_matcher) 
== formatXml(nobody));
   }
 }
 
@@ -127,8 +135,8 @@ TEST_CASE("MetadataWalker will leave a Sid unchanged if it 
doesn't correspond to
 
   REQUIRE(updateXmlMetadata(xml, nullptr, nullptr, false, true) == 
formatXml(xml));
   REQUIRE(updateXmlMetadata(xml, nullptr, nullptr, true, true) == 
formatXml(xml));
-  auto regex = utils::Regex(".*Sid");
-  REQUIRE(updateXmlMetadata(xml, nullptr, nullptr, true, true, &regex) == 
formatXml(xml));
+  const auto sid_matcher = minifi::processors::cwel::parseSidMatcher(".*Sid");
+  REQUIRE(updateXmlMetadata(xml, nullptr, nullptr, true, true, sid_matcher) == 
formatXml(xml));
 }
 
 TEST_CASE("MetadataWalker can replace multiple Sids", "[updateXmlMetadata]") {
@@ -157,8 +165,8 @@ void extractMappingsTestHelper(const std::string &file_name,
   pugi::xml_parse_result result = doc.load_string(input_xml.c_str());
   REQUIRE(result);
 
-  auto regex = utils::Regex(".*Sid");
-  MetadataWalker walker(FakeWindowsEventLogMetadata{}, 
METADATA_WALKER_TESTS_LOG_NAME, update_xml, resolve, &regex, 
&utils::OsUtils::userIdToUsername);
+  const auto sid_matcher = minifi::processors::cwel::parseSidMatcher(".*Sid");
+  MetadataWalker walker(FakeWindowsEventLogMetadata{}, 
METADATA_WALKER_TESTS_LOG_NAME, update_xml, resolve, sid_matcher, 
&utils::OsUtils::userIdToUsername);
   doc.traverse(walker);
 
   CHECK(walker.getIdentifiers() == expected_identifiers);
@@ -204,21 +212,21 @@ TEST_CASE("MetadataWalker extracts mappings correctly 
when there is a single Sid
       {METADATA::SOURCE, "Microsoft-Windows-Security-Auditing"},
       {METADATA::TIME_CREATED, "event_timestamp"},
       {METADATA::EVENTID, "4672"},
-      {METADATA::OPCODE, "event_data_for_flag_4"},
+      {METADATA::OPCODE, "event_data_for_field_4"},
       {METADATA::EVENT_RECORDID, "2575952"},
       {METADATA::EVENT_TYPE, "178"},
-      {METADATA::TASK_CATEGORY, "event_data_for_flag_3"},
-      {METADATA::LEVEL, "event_data_for_flag_2"},
-      {METADATA::KEYWORDS, "event_data_for_flag_5"}};
+      {METADATA::TASK_CATEGORY, "event_data_for_field_3"},
+      {METADATA::LEVEL, "event_data_for_field_2"},
+      {METADATA::KEYWORDS, "event_data_for_field_5"}};
 
   SECTION("update_xml is false => fields are collected into 
walker.getFieldValues()") {
     const std::map<std::string, std::string> expected_field_values{
-        {"Channel", "event_data_for_flag_6"},
-        {"Keywords", "event_data_for_flag_5"},
-        {"Level", "event_data_for_flag_2"},
-        {"Opcode", "event_data_for_flag_4"},
+        {"Channel", "event_data_for_field_6"},
+        {"Keywords", "event_data_for_field_5"},
+        {"Level", "event_data_for_field_2"},
+        {"Opcode", "event_data_for_field_4"},
         {"SubjectUserSid", "Nobody"},
-        {"Task", "event_data_for_flag_3"}};
+        {"Task", "event_data_for_field_3"}};
 
     extractMappingsTestHelper(file_name, false, true, expected_identifiers, 
expected_metadata, expected_field_values);
   }
@@ -270,21 +278,21 @@ TEST_CASE("MetadataWalker extracts mappings correctly 
when there are multiple Si
       {METADATA::SOURCE, "Microsoft-Windows-Security-Auditing"},
       {METADATA::TIME_CREATED, "event_timestamp"},
       {METADATA::EVENTID, "4672"},
-      {METADATA::OPCODE, "event_data_for_flag_4"},
+      {METADATA::OPCODE, "event_data_for_field_4"},
       {METADATA::EVENT_RECORDID, "2575952"},
       {METADATA::EVENT_TYPE, "178"},
-      {METADATA::TASK_CATEGORY, "event_data_for_flag_3"},
-      {METADATA::LEVEL, "event_data_for_flag_2"},
-      {METADATA::KEYWORDS, "event_data_for_flag_5"}};
+      {METADATA::TASK_CATEGORY, "event_data_for_field_3"},
+      {METADATA::LEVEL, "event_data_for_field_2"},
+      {METADATA::KEYWORDS, "event_data_for_field_5"}};
 
   SECTION("update_xml is false => fields are collected into 
walker.getFieldValues()") {
     const std::map<std::string, std::string> expected_field_values{
-        {"Channel", "event_data_for_flag_6"},
-        {"Keywords", "event_data_for_flag_5"},
-        {"Level", "event_data_for_flag_2"},
-        {"Opcode", "event_data_for_flag_4"},
+        {"Channel", "event_data_for_field_6"},
+        {"Keywords", "event_data_for_field_5"},
+        {"Level", "event_data_for_field_2"},
+        {"Opcode", "event_data_for_field_4"},
         {"SubjectUserSid", "Nobody"},
-        {"Task", "event_data_for_flag_3"}};
+        {"Task", "event_data_for_field_3"}};
 
     extractMappingsTestHelper(file_name, false, true, expected_identifiers, 
expected_metadata, expected_field_values);
   }
@@ -330,20 +338,20 @@ TEST_CASE("MetadataWalker extracts mappings correctly 
when the Sid is unknown an
       {METADATA::SOURCE, "Microsoft-Windows-Security-Auditing"},
       {METADATA::TIME_CREATED, "event_timestamp"},
       {METADATA::EVENTID, "4672"},
-      {METADATA::OPCODE, "event_data_for_flag_4"},
+      {METADATA::OPCODE, "event_data_for_field_4"},
       {METADATA::EVENT_RECORDID, "2575952"},
       {METADATA::EVENT_TYPE, "178"},
-      {METADATA::TASK_CATEGORY, "event_data_for_flag_3"},
-      {METADATA::LEVEL, "event_data_for_flag_2"},
-      {METADATA::KEYWORDS, "event_data_for_flag_5"}};
+      {METADATA::TASK_CATEGORY, "event_data_for_field_3"},
+      {METADATA::LEVEL, "event_data_for_field_2"},
+      {METADATA::KEYWORDS, "event_data_for_field_5"}};
 
   SECTION("update_xml is false => fields are collected into 
walker.getFieldValues()") {
     const std::map<std::string, std::string> expected_field_values{
-        {"Channel", "event_data_for_flag_6"},
-        {"Keywords", "event_data_for_flag_5"},
-        {"Level", "event_data_for_flag_2"},
-        {"Opcode", "event_data_for_flag_4"},
-        {"Task", "event_data_for_flag_3"}};
+        {"Channel", "event_data_for_field_6"},
+        {"Keywords", "event_data_for_field_5"},
+        {"Level", "event_data_for_field_2"},
+        {"Opcode", "event_data_for_field_4"},
+        {"Task", "event_data_for_field_3"}};
 
     extractMappingsTestHelper(file_name, false, true, expected_identifiers, 
expected_metadata, expected_field_values);
   }
diff --git a/extensions/windows-event-log/wel/MetadataWalker.cpp 
b/extensions/windows-event-log/wel/MetadataWalker.cpp
index c8ba37e92..788f127b4 100644
--- a/extensions/windows-event-log/wel/MetadataWalker.cpp
+++ b/extensions/windows-event-log/wel/MetadataWalker.cpp
@@ -20,7 +20,6 @@
 
 #include <map>
 #include <functional>
-#include <codecvt>
 #include <regex>
 #include <string>
 #include <utility>
@@ -44,7 +43,7 @@ bool MetadataWalker::for_each(pugi::xml_node &node) {
     return input;
   };
   for (pugi::xml_attribute attr : node.attributes())  {
-    if (regex_ && utils::regexMatch(attr.name(), *regex_)) {
+    if (sid_matcher_(attr.name())) {
       updateAttributeValue(attr, attr.name(), idUpdate);
     }
   }
@@ -52,11 +51,11 @@ bool MetadataWalker::for_each(pugi::xml_node &node) {
   const std::string node_name = node.name();
   if (node_name == "Data") {
     for (pugi::xml_attribute attr : node.attributes())  {
-      if (regex_ && utils::regexMatch(attr.name(), *regex_)) {
+      if (sid_matcher_(attr.name())) {
         updateText(node, attr.name(), idUpdate);
       }
 
-      if (regex_ && utils::regexMatch(attr.value(), *regex_)) {
+      if (sid_matcher_(attr.value())) {
         updateText(node, attr.value(), idUpdate);
       }
     }
@@ -90,7 +89,7 @@ bool MetadataWalker::for_each(pugi::xml_node &node) {
     if (it != formatFlagMap.end()) {
       std::function<std::string(const std::string &)> updateFunc = [&](const 
std::string &input) -> std::string {
         if (resolve_) {
-          auto resolved = windows_event_log_metadata_.getEventData(it->second);
+          auto resolved = windows_event_log_metadata_.getEventData(it->second, 
input);
           if (!resolved.empty()) {
             return resolved;
           }
diff --git a/extensions/windows-event-log/wel/MetadataWalker.h 
b/extensions/windows-event-log/wel/MetadataWalker.h
index 9826f09e2..9a8080f4d 100644
--- a/extensions/windows-event-log/wel/MetadataWalker.h
+++ b/extensions/windows-event-log/wel/MetadataWalker.h
@@ -49,11 +49,12 @@ namespace org::apache::nifi::minifi::wel {
  */
 class MetadataWalker : public pugi::xml_tree_walker {
  public:
-  MetadataWalker(const WindowsEventLogMetadata& windows_event_log_metadata, 
std::string log_name, bool update_xml, bool resolve, utils::Regex const* regex,
+  MetadataWalker(const WindowsEventLogMetadata& windows_event_log_metadata, 
std::string log_name, bool update_xml, bool resolve,
+      std::function<bool(std::string_view)> sid_matcher,
       std::function<std::string(std::string)> user_id_to_username_fn)
       : windows_event_log_metadata_(windows_event_log_metadata),
         log_name_(std::move(log_name)),
-        regex_(regex),
+        sid_matcher_(std::move(sid_matcher)),
         update_xml_(update_xml),
         resolve_(resolve),
         user_id_to_username_fn_(std::move(user_id_to_username_fn)) {
@@ -82,8 +83,6 @@ class MetadataWalker : public pugi::xml_tree_walker {
     return "N/A";
   }
 
-  static std::string to_string(const wchar_t* pChar);
-
   /**
    * Updates text within the XML representation
    */
@@ -96,7 +95,7 @@ class MetadataWalker : public pugi::xml_tree_walker {
 
   const WindowsEventLogMetadata& windows_event_log_metadata_;
   const std::string log_name_;
-  utils::Regex const * const regex_;
+  std::function<bool(std::string_view)> sid_matcher_;
   const bool update_xml_;
   const bool resolve_;
   std::function<std::string(const std::string&)> user_id_to_username_fn_;
diff --git a/extensions/windows-event-log/wel/WindowsEventLog.cpp 
b/extensions/windows-event-log/wel/WindowsEventLog.cpp
index 0140ee707..585333e99 100644
--- a/extensions/windows-event-log/wel/WindowsEventLog.cpp
+++ b/extensions/windows-event-log/wel/WindowsEventLog.cpp
@@ -42,6 +42,18 @@ std::string getEventTimestampStr(uint64_t event_timestamp) {
 }
 }  // namespace
 
+std::optional<std::string> EventDataCache::get(EVT_FORMAT_MESSAGE_FLAGS field, 
const std::string& key) const {
+    std::lock_guard<std::mutex> lock{mutex_};
+    const auto it = cache_.find(CacheKey{field, key});
+    if (it != cache_.end() && it->second.expiry > 
std::chrono::system_clock::now()) { return it->second.value; }
+    return std::nullopt;
+}
+
+void EventDataCache::set(EVT_FORMAT_MESSAGE_FLAGS field, const std::string& 
key, std::string value) {
+  std::lock_guard<std::mutex> lock{mutex_};
+  cache_.insert_or_assign(CacheKey{field, key}, CacheItem{std::move(value), 
std::chrono::system_clock::now() + lifetime_});
+}
+
 void WindowsEventLogMetadataImpl::renderMetadata() {
   DWORD status = ERROR_SUCCESS;
   EVT_VARIANT stackBuffer[4096];
@@ -108,38 +120,48 @@ void WindowsEventLogMetadataImpl::renderMetadata() {
   }
 }
 
-std::string WindowsEventLogMetadataImpl::getEventData(EVT_FORMAT_MESSAGE_FLAGS 
flags) const {
+std::string WindowsEventLogMetadataImpl::getEventData(EVT_FORMAT_MESSAGE_FLAGS 
field, const std::string& key) const {
+  return metadata_ptr_.getEventData(field, key, event_ptr_);
+}
+
+std::string WindowsEventLogHandler::getEventData(EVT_FORMAT_MESSAGE_FLAGS 
field, const std::string& key, EVT_HANDLE event_ptr) const {
+  auto cached_value = event_data_cache_.get(field, key);
+  if (cached_value) { return *cached_value; }
+  auto new_value = getEventDataImpl(field, event_ptr);
+  event_data_cache_.set(field, key, new_value);
+  return new_value;
+}
+
+std::string WindowsEventLogHandler::getEventDataImpl(EVT_FORMAT_MESSAGE_FLAGS 
field, EVT_HANDLE event_ptr) const {
   WCHAR stack_buffer[4096];
   DWORD num_chars_in_buffer = sizeof(stack_buffer) / sizeof(stack_buffer[0]);
   using Deleter = utils::StackAwareDeleter<WCHAR, utils::FreeDeleter>;
   std::unique_ptr<WCHAR, Deleter> buffer{stack_buffer, Deleter{stack_buffer}};
   DWORD num_chars_used = 0;
 
-  std::string event_data;
-
-  if (!metadata_ptr_ || !event_ptr_) {
-    return event_data;
+  if (!metadata_provider_ || !event_ptr) {
+    return {};
   }
 
-  if (!EvtFormatMessage(metadata_ptr_, event_ptr_, 0, 0, nullptr, flags, 
num_chars_in_buffer, buffer.get(), &num_chars_used)) {
+  if (!EvtFormatMessage(metadata_provider_.get(), event_ptr, 0, 0, nullptr, 
field, num_chars_in_buffer, buffer.get(), &num_chars_used)) {
     auto last_error = GetLastError();
     if (ERROR_INSUFFICIENT_BUFFER == last_error) {
       num_chars_in_buffer = num_chars_used;
 
       buffer.reset((LPWSTR) malloc(num_chars_in_buffer * sizeof(WCHAR)));
       if (!buffer) {
-        return event_data;
+        return {};
       }
 
-      EvtFormatMessage(metadata_ptr_, event_ptr_, 0, 0, nullptr, flags, 
num_chars_in_buffer, buffer.get(), &num_chars_used);
+      EvtFormatMessage(metadata_provider_.get(), event_ptr, 0, 0, nullptr, 
field, num_chars_in_buffer, buffer.get(), &num_chars_used);
     }
   }
 
   if (num_chars_used == 0) {
-    return event_data;
+    return {};
   }
 
-  if (EvtFormatMessageKeyword == flags) {
+  if (EvtFormatMessageKeyword == field) {
     buffer.get()[num_chars_used - 1] = L'\0';
   }
   return utils::to_string(std::wstring{buffer.get()});
@@ -203,8 +225,4 @@ std::string 
WindowsEventLogHeader::createDefaultDelimiter(size_t length) const {
   }
 }
 
-EVT_HANDLE WindowsEventLogHandler::getMetadata() const {
-  return metadata_provider_.get();
-}
-
 }  // namespace org::apache::nifi::minifi::wel
diff --git a/extensions/windows-event-log/wel/WindowsEventLog.h 
b/extensions/windows-event-log/wel/WindowsEventLog.h
index 5a2029f2d..7bfae03d2 100644
--- a/extensions/windows-event-log/wel/WindowsEventLog.h
+++ b/extensions/windows-event-log/wel/WindowsEventLog.h
@@ -58,11 +58,36 @@ enum METADATA {
   COMPUTER,
   UNKNOWN
 };
+using METADATA_NAMES = std::vector<std::pair<METADATA, std::string>>;
 
+class EventDataCache {
+ public:
+  explicit EventDataCache(std::chrono::milliseconds lifetime = 
std::chrono::hours{24})
+      : lifetime_(lifetime) {}
+  [[nodiscard]] std::optional<std::string> get(EVT_FORMAT_MESSAGE_FLAGS field, 
const std::string& key) const;
+  void set(EVT_FORMAT_MESSAGE_FLAGS field, const std::string& key, std::string 
value);
 
-// this is a continuous enum, so we can rely on the array
-
-using METADATA_NAMES = std::vector<std::pair<METADATA, std::string>>;
+ private:
+  struct CacheKey {
+    EVT_FORMAT_MESSAGE_FLAGS field;
+    std::string key;
+
+    [[nodiscard]] bool operator==(const CacheKey&) const noexcept = default;
+  };
+  struct CacheKeyHash {
+    [[nodiscard]] size_t operator()(const CacheKey& cache_key) const noexcept {
+      return 
utils::hash_combine(std::hash<EVT_FORMAT_MESSAGE_FLAGS>{}(cache_key.field), 
std::hash<std::string>{}(cache_key.key));
+    }
+  };
+  struct CacheItem {
+    std::string value;
+    std::chrono::system_clock::time_point expiry;
+  };
+
+  mutable std::mutex mutex_;
+  std::chrono::milliseconds lifetime_;
+  std::unordered_map<CacheKey, CacheItem, CacheKeyHash> cache_;
+};
 
 class WindowsEventLogHandler {
  public:
@@ -74,16 +99,19 @@ class WindowsEventLogHandler {
 
   nonstd::expected<std::string, std::error_code> getEventMessage(EVT_HANDLE 
eventHandle) const;
 
-  [[nodiscard]] EVT_HANDLE getMetadata() const;
+  [[nodiscard]] std::string getEventData(EVT_FORMAT_MESSAGE_FLAGS field, const 
std::string& key, EVT_HANDLE event_ptr) const;
 
  private:
+  [[nodiscard]] std::string getEventDataImpl(EVT_FORMAT_MESSAGE_FLAGS field, 
EVT_HANDLE event_ptr) const;
+
   unique_evt_handle metadata_provider_;
+  mutable EventDataCache event_data_cache_;
 };
 
 class WindowsEventLogMetadata {
  public:
   virtual ~WindowsEventLogMetadata() = default;
-  [[nodiscard]] virtual std::string getEventData(EVT_FORMAT_MESSAGE_FLAGS 
flags) const = 0;
+  [[nodiscard]] virtual std::string getEventData(EVT_FORMAT_MESSAGE_FLAGS 
field, const std::string& key) const = 0;
   [[nodiscard]] virtual std::string getEventTimestamp() const = 0;
   virtual short getEventTypeIndex() const = 0;  // NOLINT short comes from 
WINDOWS API
 
@@ -147,11 +175,11 @@ class WindowsEventLogMetadata {
 
 class WindowsEventLogMetadataImpl : public WindowsEventLogMetadata {
  public:
-  WindowsEventLogMetadataImpl(EVT_HANDLE metadataProvider, EVT_HANDLE 
event_ptr) : metadata_ptr_(metadataProvider), event_ptr_(event_ptr) {
+  WindowsEventLogMetadataImpl(const WindowsEventLogHandler& metadataProvider, 
EVT_HANDLE event_ptr) : metadata_ptr_(metadataProvider), event_ptr_(event_ptr) {
     renderMetadata();
   }
 
-  [[nodiscard]] std::string getEventData(EVT_FORMAT_MESSAGE_FLAGS flags) const 
override;
+  [[nodiscard]] std::string getEventData(EVT_FORMAT_MESSAGE_FLAGS field, const 
std::string& key) const override;
 
   [[nodiscard]] std::string getEventTimestamp() const override { return 
event_timestamp_str_; }
 
@@ -164,7 +192,7 @@ class WindowsEventLogMetadataImpl : public 
WindowsEventLogMetadata {
   short event_type_index_ = 0;  // NOLINT short comes from WINDOWS API
   std::string event_timestamp_str_;
   EVT_HANDLE event_ptr_;
-  EVT_HANDLE metadata_ptr_;
+  const WindowsEventLogHandler& metadata_ptr_;
 };
 
 class WindowsEventLogHeader {

Reply via email to