martinzink commented on a change in pull request #1044: URL: https://github.com/apache/nifi-minifi-cpp/pull/1044#discussion_r632507305
########## File path: extensions/systemd/ConsumeJournald.cpp ########## @@ -0,0 +1,262 @@ +/** + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ConsumeJournald.h" + +#include <algorithm> + +#include <date/date.h> +#include "spdlog/spdlog.h" // TODO(szaszm): make fmt directly available +#include "utils/GeneralUtils.h" + +namespace org { namespace apache { namespace nifi { namespace minifi { namespace extensions { namespace systemd { + +constexpr const char* ConsumeJournald::CURSOR_KEY; +const core::Relationship ConsumeJournald::Success("success", "Successfully consumed journal messages."); + +const core::Property ConsumeJournald::BatchSize = core::PropertyBuilder::createProperty("Batch Size") + ->withDescription("The maximum number of entries processed in a single execution.") + ->withDefaultValue<size_t>(1000) + ->isRequired(true) + ->build(); + +const core::Property ConsumeJournald::PayloadFormat = core::PropertyBuilder::createProperty("Payload Format") + ->withDescription("Configures flow file content formatting. Raw: only the message. Syslog: similar to syslog or journalctl output.") + ->withDefaultValue<std::string>(PAYLOAD_FORMAT_SYSLOG) + ->withAllowableValues<std::string>({PAYLOAD_FORMAT_RAW, PAYLOAD_FORMAT_SYSLOG}) + ->isRequired(true) + ->build(); + +const core::Property ConsumeJournald::IncludeTimestamp = core::PropertyBuilder::createProperty("Include Timestamp") + ->withDescription("Include message timestamp in the 'timestamp' attribute.") + ->withDefaultValue<bool>(true) + ->isRequired(true) + ->build(); + +const core::Property ConsumeJournald::JournalType = core::PropertyBuilder::createProperty("Journal Type") + ->withDescription("Type of journal to consume.") + ->withDefaultValue<std::string>(JOURNAL_TYPE_SYSTEM) + ->withAllowableValues<std::string>({JOURNAL_TYPE_USER, JOURNAL_TYPE_SYSTEM, JOURNAL_TYPE_BOTH}) + ->isRequired(true) + ->build(); + +const core::Property ConsumeJournald::ProcessOldMessages = core::PropertyBuilder::createProperty("Process Old Messages") + ->withDescription("Process events created before the first usage (schedule) of the processor instance.") + ->withDefaultValue<bool>(false) + ->isRequired(true) + ->build(); + +const core::Property ConsumeJournald::TimestampFormat = core::PropertyBuilder::createProperty("Timestamp Format") + ->withDescription("Format string to use when creating the timestamp attribute or writing messages in the syslog format.") + ->withDefaultValue("%x %X %Z") + ->isRequired(true) + ->build(); + +ConsumeJournald::ConsumeJournald(const std::string &name, const utils::Identifier &id, std::unique_ptr<libwrapper::LibWrapper>&& libwrapper) + :core::Processor{name, id}, libwrapper_{std::move(libwrapper)} +{} + +void ConsumeJournald::initialize() { + setSupportedProperties({BatchSize, PayloadFormat, IncludeTimestamp, JournalType, ProcessOldMessages, TimestampFormat}); + setSupportedRelationships({Success}); + + worker_ = utils::make_unique<Worker>(); +} + +void ConsumeJournald::notifyStop() { + bool running = true; + if (!running_.compare_exchange_strong(running, false, std::memory_order_acq_rel) || !journal_) return; + worker_->enqueue([this] { + journal_ = nullptr; + }).get(); + worker_ = nullptr; +} + +void ConsumeJournald::onSchedule(core::ProcessContext* const context, core::ProcessSessionFactory* const sessionFactory) { + gsl_Expects(context && sessionFactory && !running_ && worker_); + using JournalTypeEnum = systemd::JournalType; + + const auto parse_payload_format = [](const std::string& property_value) -> utils::optional<systemd::PayloadFormat> { + if (utils::StringUtils::equalsIgnoreCase(property_value, PAYLOAD_FORMAT_RAW)) return systemd::PayloadFormat::Raw; + if (utils::StringUtils::equalsIgnoreCase(property_value, PAYLOAD_FORMAT_SYSLOG)) return systemd::PayloadFormat::Syslog; + return utils::nullopt; + }; + const auto parse_journal_type = [](const std::string& property_value) -> utils::optional<JournalTypeEnum> { + if (utils::StringUtils::equalsIgnoreCase(property_value, JOURNAL_TYPE_USER)) return JournalTypeEnum::User; + if (utils::StringUtils::equalsIgnoreCase(property_value, JOURNAL_TYPE_SYSTEM)) return JournalTypeEnum::System; + if (utils::StringUtils::equalsIgnoreCase(property_value, JOURNAL_TYPE_BOTH)) return JournalTypeEnum::Both; + return utils::nullopt; + }; + batch_size_ = context->getProperty<size_t>(BatchSize).value_or(10); + payload_format_ = (context->getProperty(PayloadFormat) | utils::flatMap(parse_payload_format)).value_or(systemd::PayloadFormat::Syslog); + include_timestamp_ = context->getProperty<bool>(IncludeTimestamp).value_or(true); + const auto journal_type = (context->getProperty(JournalType) | utils::flatMap(parse_journal_type)).value_or(JournalTypeEnum::System); + const auto process_old_messages = context->getProperty<bool>(ProcessOldMessages).value_or(false); + timestamp_format_ = [&context] { + auto tf_prop = context->getProperty(TimestampFormat).value_or(TimestampFormat.getDefaultValue()); + if (tf_prop == "ISO" || tf_prop == "ISO 8601" || tf_prop == "ISO8601") return std::string{"%FT%T%Ez"}; + return tf_prop; + }(); + + state_manager_ = context->getStateManager(); + // All journal-related API calls are thread-agnostic, meaning they need to be called from the same thread. In our environment, + // where a processor can easily be scheduled on different threads, we ensure this by executing all library calls on a dedicated + // worker thread. This is why all such operations are dispatched to a thread and immediately waited for in the initiating thread. + journal_ = worker_->enqueue([this, journal_type]{ return libwrapper_->openJournal(journal_type); }).get(); + const auto seek_default = [this, process_old_messages](libwrapper::Journal& journal) { + if (process_old_messages) journal.seekHead(); + else journal.seekTail(); + state_manager_->set({{"cursor", getCursor()}}); Review comment: When I tried to run the processor locally (both on Ubuntu 18.04 and manjaro) the onSchedule failed because this getCursor calls returned an error. sd_journal_get_cursor: Cannot assign requested address From the man I think we are missing a sd_journal_next call (when I hacked it into here, it succuded and it is working fine) From the man ``` Note that sd_journal_get_cursor() will not work before sd_journal_next(3) (or related call) has been called at least once, in order to position the read pointer at a valid entry. `` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org