lordgamez commented on a change in pull request #975:
URL: https://github.com/apache/nifi-minifi-cpp/pull/975#discussion_r563670164



##########
File path: extensions/aws/processors/ListS3.cpp
##########
@@ -0,0 +1,294 @@
+/**
+ * @file ListS3.cpp
+ * ListS3 class implementation
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ListS3.h"
+
+#include <tuple>
+#include <algorithm>
+#include <set>
+#include <utility>
+#include <memory>
+
+#include "utils/StringUtils.h"
+
+namespace org {
+namespace apache {
+namespace nifi {
+namespace minifi {
+namespace aws {
+namespace processors {
+
+const std::string ListS3::LATEST_LISTED_KEY_PREFIX = "listed_key.";
+const std::string ListS3::LATEST_LISTED_KEY_TIMESTAMP = "listed_key.timestamp";
+
+const core::Property ListS3::Delimiter(
+  core::PropertyBuilder::createProperty("Delimiter")
+    ->withDescription("The string used to delimit directories within the 
bucket. Please consult the AWS documentation for the correct use of this 
field.")
+    ->build());
+const core::Property ListS3::Prefix(
+  core::PropertyBuilder::createProperty("Prefix")
+    ->withDescription("The prefix used to filter the object list. In most 
cases, it should end with a forward slash ('/').")
+    ->build());
+const core::Property ListS3::UseVersions(
+  core::PropertyBuilder::createProperty("Use Versions")
+    ->isRequired(true)
+    ->withDefaultValue<bool>(false)
+    ->withDescription("Specifies whether to use S3 versions, if applicable. If 
false, only the latest version of each object will be returned.")
+    ->build());
+const core::Property ListS3::MinimumObjectAge(
+  core::PropertyBuilder::createProperty("Minimum Object Age")
+    ->isRequired(true)
+    ->withDefaultValue<core::TimePeriodValue>("0 sec")
+    ->withDescription("The minimum age that an S3 object must be in order to 
be considered; any object younger than this amount of time (according to last 
modification date) will be ignored.")
+    ->build());
+const core::Property ListS3::WriteObjectTags(
+  core::PropertyBuilder::createProperty("Write Object Tags")
+    ->isRequired(true)
+    ->withDefaultValue<bool>(false)
+    ->withDescription("If set to 'true', the tags associated with the S3 
object will be written as FlowFile attributes.")
+    ->build());
+const core::Property ListS3::WriteUserMetadata(
+  core::PropertyBuilder::createProperty("Write User Metadata")
+    ->isRequired(true)
+    ->withDefaultValue<bool>(false)
+    ->withDescription("If set to 'true', the user defined metadata associated 
with the S3 object will be added to FlowFile attributes/records.")
+    ->build());
+const core::Property ListS3::RequesterPays(
+  core::PropertyBuilder::createProperty("Requester Pays")
+    ->isRequired(true)
+    ->withDefaultValue<bool>(false)
+    ->withDescription("If true, indicates that the requester consents to pay 
any charges associated with listing the S3 bucket. This sets the 
'x-amz-request-payer' header to 'requester'. "
+                      "Note that this setting is only used if Write User 
Metadata is true.")
+    ->build());
+
+const core::Relationship ListS3::Success("success", "FlowFiles are routed to 
success relationship");
+
+void ListS3::initialize() {
+  // Add new supported properties
+  updateSupportedProperties({Delimiter, Prefix, UseVersions, MinimumObjectAge, 
WriteObjectTags, WriteUserMetadata, RequesterPays});
+  // Set the supported relationships
+  setSupportedRelationships({Success});
+}
+
+void ListS3::onSchedule(const std::shared_ptr<core::ProcessContext> &context, 
const std::shared_ptr<core::ProcessSessionFactory> &sessionFactory) {
+  S3Processor::onSchedule(context, sessionFactory);
+
+  state_manager_ = context->getStateManager();
+  if (state_manager_ == nullptr) {
+    throw Exception(PROCESSOR_EXCEPTION, "Failed to get StateManager");
+  }
+
+  auto common_properties = getCommonELSupportedProperties(context, nullptr);
+  if (!common_properties) {
+    throw Exception(PROCESS_SCHEDULE_EXCEPTION, "Required property is not set 
or invalid");
+  }
+  configureS3Wrapper(common_properties.value());
+  list_request_params_.bucket = common_properties->bucket;
+
+  context->getProperty(Delimiter.getName(), list_request_params_.delimiter);
+  logger_->log_debug("ListS3: Delimiter [%s]", list_request_params_.delimiter);
+
+  context->getProperty(Prefix.getName(), list_request_params_.prefix);
+  logger_->log_debug("ListS3: Prefix [%s]", list_request_params_.prefix);
+
+  context->getProperty(UseVersions.getName(), 
list_request_params_.use_versions);
+  logger_->log_debug("ListS3: UseVersions [%s]", 
list_request_params_.use_versions ? "true" : "false");
+
+  std::string min_obj_age_str;
+  if (!context->getProperty(MinimumObjectAge.getName(), min_obj_age_str) || 
min_obj_age_str.empty() || 
!core::Property::getTimeMSFromString(min_obj_age_str, 
list_request_params_.min_object_age)) {
+    throw Exception(PROCESS_SCHEDULE_EXCEPTION, "Minimum Object Age missing or 
invalid");
+  }
+  logger_->log_debug("S3Processor: Minimum Object Age [%llud]", 
min_obj_age_str, list_request_params_.min_object_age);
+
+  context->getProperty(WriteObjectTags.getName(), write_object_tags_);
+  logger_->log_debug("ListS3: WriteObjectTags [%s]", write_object_tags_ ? 
"true" : "false");
+
+  context->getProperty(WriteUserMetadata.getName(), write_user_metadata_);
+  logger_->log_debug("ListS3: WriteUserMetadata [%s]", write_user_metadata_ ? 
"true" : "false");
+
+  context->getProperty(RequesterPays.getName(), requester_pays_);
+  logger_->log_debug("ListS3: RequesterPays [%s]", requester_pays_ ? "true" : 
"false");
+}
+
+void ListS3::writeObjectTags(
+    const std::string &bucket,
+    const aws::s3::ListedObjectAttributes &object_attributes,
+    const std::shared_ptr<core::ProcessSession> &session,
+    const std::shared_ptr<core::FlowFile> &flow_file) {
+  if (!write_object_tags_) {
+    return;
+  }
+
+  auto get_object_tags_result = s3_wrapper_.getObjectTags(bucket, 
object_attributes.filename, object_attributes.version);
+  if (get_object_tags_result) {
+    for (const auto& tag : get_object_tags_result.value()) {
+      session->putAttribute(flow_file, "s3.tag." + tag.first, tag.second);
+    }
+  } else {
+    logger_->log_warn("Failed to get object tags for object %s in bucket %s", 
object_attributes.filename, bucket);
+  }
+}
+
+void ListS3::writeUserMetadata(
+    const aws::s3::ListedObjectAttributes &object_attributes,
+    const std::shared_ptr<core::ProcessSession> &session,
+    const std::shared_ptr<core::FlowFile> &flow_file) {
+  if (!write_user_metadata_) {
+    return;
+  }
+
+  aws::s3::HeadObjectRequestParameters params;
+  params.bucket = list_request_params_.bucket;
+  params.object_key = object_attributes.filename;
+  params.version = object_attributes.version;
+  params.requester_pays = requester_pays_;
+  auto head_object_tags_result = s3_wrapper_.headObject(params);
+  if (head_object_tags_result) {
+    for (const auto& metadata : head_object_tags_result->user_metadata_map) {
+      session->putAttribute(flow_file, "s3.user.metadata." + metadata.first, 
metadata.second);
+    }
+  } else {
+    logger_->log_warn("Failed to get object metadata for object %s in bucket 
%s", params.object_key, params.bucket);
+  }
+}
+
+std::vector<std::string> ListS3::getLatestListedKeys(const 
std::unordered_map<std::string, std::string> &state) {
+  std::vector<std::string> latest_listed_keys;
+  for (const auto& kvp : state) {
+    if (kvp.first.rfind(LATEST_LISTED_KEY_PREFIX, 0) == 0) {
+      latest_listed_keys.push_back(kvp.second);
+    }
+  }
+  return latest_listed_keys;
+}
+
+uint64_t ListS3::getLatestListedKeyTimestamp(const 
std::unordered_map<std::string, std::string> &state) {
+  std::string stored_listed_key_timestamp_str;
+  auto it = state.find(LATEST_LISTED_KEY_TIMESTAMP);
+  if (it != state.end()) {
+    stored_listed_key_timestamp_str = it->second;
+  }
+
+  int64_t stored_listed_key_timestamp = 0;
+  core::Property::StringToInt(stored_listed_key_timestamp_str, 
stored_listed_key_timestamp);
+
+  return stored_listed_key_timestamp;
+}
+
+ListS3::ListingState ListS3::getCurrentState(const 
std::shared_ptr<core::ProcessContext> &context) {
+  ListS3::ListingState current_listing_state;
+  std::unordered_map<std::string, std::string> state;
+  if (!state_manager_->get(state)) {
+    logger_->log_info("No stored state for listed objects was found");
+    return current_listing_state;
+  }
+
+  current_listing_state.listed_key_timestamp = 
getLatestListedKeyTimestamp(state);
+  logger_->log_debug("Restored previous listed timestamp %lld", 
current_listing_state.listed_key_timestamp);
+
+  current_listing_state.listed_keys = getLatestListedKeys(state);
+  return current_listing_state;
+}
+
+void ListS3::storeState(const std::shared_ptr<core::ProcessContext> &context, 
const ListS3::ListingState &latest_listing_state) {
+  std::unordered_map<std::string, std::string> state;
+  state[LATEST_LISTED_KEY_TIMESTAMP] = 
std::to_string(latest_listing_state.listed_key_timestamp);
+  for (std::size_t i = 0; i < latest_listing_state.listed_keys.size(); ++i) {
+    state[LATEST_LISTED_KEY_PREFIX + std::to_string(i)] = 
latest_listing_state.listed_keys.at(i);
+  }
+  logger_->log_debug("Stored new listed timestamp %lld", 
latest_listing_state.listed_key_timestamp);
+  state_manager_->set(state);
+}
+
+void ListS3::createNewFlowFile(
+    const std::shared_ptr<core::ProcessSession> &session,
+    const aws::s3::ListedObjectAttributes &object_attributes) {
+  auto flow_file = session->create();
+  session->putAttribute(flow_file, "s3.bucket", list_request_params_.bucket);
+  session->putAttribute(flow_file, core::SpecialFlowAttribute::FILENAME, 
object_attributes.filename);
+  session->putAttribute(flow_file, "s3.etag", object_attributes.etag);
+  session->putAttribute(flow_file, "s3.isLatest", object_attributes.is_latest 
? "true" : "false");
+  session->putAttribute(flow_file, "s3.lastModified", 
std::to_string(object_attributes.last_modified));
+  session->putAttribute(flow_file, "s3.length", 
std::to_string(object_attributes.length));
+  session->putAttribute(flow_file, "s3.storeClass", 
object_attributes.store_class);
+  if (!object_attributes.version.empty()) {
+    session->putAttribute(flow_file, "s3.version", object_attributes.version);
+  }
+  writeObjectTags(list_request_params_.bucket, object_attributes, session, 
flow_file);
+  writeUserMetadata(object_attributes, session, flow_file);
+
+  session->transfer(flow_file, Success);
+}
+
+void ListS3::onTrigger(const std::shared_ptr<core::ProcessContext> &context, 
const std::shared_ptr<core::ProcessSession> &session) {
+  logger_->log_debug("ListS3 onTrigger");
+
+  auto aws_results = s3_wrapper_.listBucket(list_request_params_);
+  if (!aws_results) {
+    logger_->log_error("Failed to list S3 bucket %s", 
list_request_params_.bucket);
+    context->yield();
+    return;
+  }
+
+  if (aws_results->size() == 0) {
+    logger_->log_debug("No S3 object were found in bucket %s", 
list_request_params_.bucket);
+    return;

Review comment:
       I checked the Nifi implementation for reference and it seems it yields 
in case there is no new S3 object to list. I'll update the implementation 
accordingly.

##########
File path: extensions/aws/processors/ListS3.cpp
##########
@@ -0,0 +1,294 @@
+/**
+ * @file ListS3.cpp
+ * ListS3 class implementation
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ListS3.h"
+
+#include <tuple>
+#include <algorithm>
+#include <set>
+#include <utility>
+#include <memory>
+
+#include "utils/StringUtils.h"
+
+namespace org {
+namespace apache {
+namespace nifi {
+namespace minifi {
+namespace aws {
+namespace processors {
+
+const std::string ListS3::LATEST_LISTED_KEY_PREFIX = "listed_key.";
+const std::string ListS3::LATEST_LISTED_KEY_TIMESTAMP = "listed_key.timestamp";
+
+const core::Property ListS3::Delimiter(
+  core::PropertyBuilder::createProperty("Delimiter")
+    ->withDescription("The string used to delimit directories within the 
bucket. Please consult the AWS documentation for the correct use of this 
field.")
+    ->build());
+const core::Property ListS3::Prefix(
+  core::PropertyBuilder::createProperty("Prefix")
+    ->withDescription("The prefix used to filter the object list. In most 
cases, it should end with a forward slash ('/').")
+    ->build());
+const core::Property ListS3::UseVersions(
+  core::PropertyBuilder::createProperty("Use Versions")
+    ->isRequired(true)
+    ->withDefaultValue<bool>(false)
+    ->withDescription("Specifies whether to use S3 versions, if applicable. If 
false, only the latest version of each object will be returned.")
+    ->build());
+const core::Property ListS3::MinimumObjectAge(
+  core::PropertyBuilder::createProperty("Minimum Object Age")
+    ->isRequired(true)
+    ->withDefaultValue<core::TimePeriodValue>("0 sec")
+    ->withDescription("The minimum age that an S3 object must be in order to 
be considered; any object younger than this amount of time (according to last 
modification date) will be ignored.")
+    ->build());
+const core::Property ListS3::WriteObjectTags(
+  core::PropertyBuilder::createProperty("Write Object Tags")
+    ->isRequired(true)
+    ->withDefaultValue<bool>(false)
+    ->withDescription("If set to 'true', the tags associated with the S3 
object will be written as FlowFile attributes.")
+    ->build());
+const core::Property ListS3::WriteUserMetadata(
+  core::PropertyBuilder::createProperty("Write User Metadata")
+    ->isRequired(true)
+    ->withDefaultValue<bool>(false)
+    ->withDescription("If set to 'true', the user defined metadata associated 
with the S3 object will be added to FlowFile attributes/records.")
+    ->build());
+const core::Property ListS3::RequesterPays(
+  core::PropertyBuilder::createProperty("Requester Pays")
+    ->isRequired(true)
+    ->withDefaultValue<bool>(false)
+    ->withDescription("If true, indicates that the requester consents to pay 
any charges associated with listing the S3 bucket. This sets the 
'x-amz-request-payer' header to 'requester'. "
+                      "Note that this setting is only used if Write User 
Metadata is true.")
+    ->build());
+
+const core::Relationship ListS3::Success("success", "FlowFiles are routed to 
success relationship");
+
+void ListS3::initialize() {
+  // Add new supported properties
+  updateSupportedProperties({Delimiter, Prefix, UseVersions, MinimumObjectAge, 
WriteObjectTags, WriteUserMetadata, RequesterPays});
+  // Set the supported relationships
+  setSupportedRelationships({Success});
+}
+
+void ListS3::onSchedule(const std::shared_ptr<core::ProcessContext> &context, 
const std::shared_ptr<core::ProcessSessionFactory> &sessionFactory) {
+  S3Processor::onSchedule(context, sessionFactory);
+
+  state_manager_ = context->getStateManager();
+  if (state_manager_ == nullptr) {
+    throw Exception(PROCESSOR_EXCEPTION, "Failed to get StateManager");
+  }
+
+  auto common_properties = getCommonELSupportedProperties(context, nullptr);
+  if (!common_properties) {
+    throw Exception(PROCESS_SCHEDULE_EXCEPTION, "Required property is not set 
or invalid");
+  }
+  configureS3Wrapper(common_properties.value());
+  list_request_params_.bucket = common_properties->bucket;
+
+  context->getProperty(Delimiter.getName(), list_request_params_.delimiter);
+  logger_->log_debug("ListS3: Delimiter [%s]", list_request_params_.delimiter);
+
+  context->getProperty(Prefix.getName(), list_request_params_.prefix);
+  logger_->log_debug("ListS3: Prefix [%s]", list_request_params_.prefix);
+
+  context->getProperty(UseVersions.getName(), 
list_request_params_.use_versions);
+  logger_->log_debug("ListS3: UseVersions [%s]", 
list_request_params_.use_versions ? "true" : "false");
+
+  std::string min_obj_age_str;
+  if (!context->getProperty(MinimumObjectAge.getName(), min_obj_age_str) || 
min_obj_age_str.empty() || 
!core::Property::getTimeMSFromString(min_obj_age_str, 
list_request_params_.min_object_age)) {
+    throw Exception(PROCESS_SCHEDULE_EXCEPTION, "Minimum Object Age missing or 
invalid");
+  }
+  logger_->log_debug("S3Processor: Minimum Object Age [%llud]", 
min_obj_age_str, list_request_params_.min_object_age);
+
+  context->getProperty(WriteObjectTags.getName(), write_object_tags_);
+  logger_->log_debug("ListS3: WriteObjectTags [%s]", write_object_tags_ ? 
"true" : "false");
+
+  context->getProperty(WriteUserMetadata.getName(), write_user_metadata_);
+  logger_->log_debug("ListS3: WriteUserMetadata [%s]", write_user_metadata_ ? 
"true" : "false");
+
+  context->getProperty(RequesterPays.getName(), requester_pays_);
+  logger_->log_debug("ListS3: RequesterPays [%s]", requester_pays_ ? "true" : 
"false");
+}
+
+void ListS3::writeObjectTags(
+    const std::string &bucket,
+    const aws::s3::ListedObjectAttributes &object_attributes,
+    const std::shared_ptr<core::ProcessSession> &session,
+    const std::shared_ptr<core::FlowFile> &flow_file) {
+  if (!write_object_tags_) {
+    return;
+  }
+
+  auto get_object_tags_result = s3_wrapper_.getObjectTags(bucket, 
object_attributes.filename, object_attributes.version);
+  if (get_object_tags_result) {
+    for (const auto& tag : get_object_tags_result.value()) {
+      session->putAttribute(flow_file, "s3.tag." + tag.first, tag.second);
+    }
+  } else {
+    logger_->log_warn("Failed to get object tags for object %s in bucket %s", 
object_attributes.filename, bucket);
+  }
+}
+
+void ListS3::writeUserMetadata(
+    const aws::s3::ListedObjectAttributes &object_attributes,
+    const std::shared_ptr<core::ProcessSession> &session,
+    const std::shared_ptr<core::FlowFile> &flow_file) {
+  if (!write_user_metadata_) {
+    return;
+  }
+
+  aws::s3::HeadObjectRequestParameters params;
+  params.bucket = list_request_params_.bucket;
+  params.object_key = object_attributes.filename;
+  params.version = object_attributes.version;
+  params.requester_pays = requester_pays_;
+  auto head_object_tags_result = s3_wrapper_.headObject(params);
+  if (head_object_tags_result) {
+    for (const auto& metadata : head_object_tags_result->user_metadata_map) {
+      session->putAttribute(flow_file, "s3.user.metadata." + metadata.first, 
metadata.second);
+    }
+  } else {
+    logger_->log_warn("Failed to get object metadata for object %s in bucket 
%s", params.object_key, params.bucket);
+  }
+}
+
+std::vector<std::string> ListS3::getLatestListedKeys(const 
std::unordered_map<std::string, std::string> &state) {
+  std::vector<std::string> latest_listed_keys;
+  for (const auto& kvp : state) {
+    if (kvp.first.rfind(LATEST_LISTED_KEY_PREFIX, 0) == 0) {
+      latest_listed_keys.push_back(kvp.second);
+    }
+  }
+  return latest_listed_keys;
+}
+
+uint64_t ListS3::getLatestListedKeyTimestamp(const 
std::unordered_map<std::string, std::string> &state) {
+  std::string stored_listed_key_timestamp_str;
+  auto it = state.find(LATEST_LISTED_KEY_TIMESTAMP);
+  if (it != state.end()) {
+    stored_listed_key_timestamp_str = it->second;
+  }
+
+  int64_t stored_listed_key_timestamp = 0;
+  core::Property::StringToInt(stored_listed_key_timestamp_str, 
stored_listed_key_timestamp);
+
+  return stored_listed_key_timestamp;
+}
+
+ListS3::ListingState ListS3::getCurrentState(const 
std::shared_ptr<core::ProcessContext> &context) {
+  ListS3::ListingState current_listing_state;
+  std::unordered_map<std::string, std::string> state;
+  if (!state_manager_->get(state)) {
+    logger_->log_info("No stored state for listed objects was found");
+    return current_listing_state;
+  }
+
+  current_listing_state.listed_key_timestamp = 
getLatestListedKeyTimestamp(state);
+  logger_->log_debug("Restored previous listed timestamp %lld", 
current_listing_state.listed_key_timestamp);
+
+  current_listing_state.listed_keys = getLatestListedKeys(state);
+  return current_listing_state;
+}
+
+void ListS3::storeState(const std::shared_ptr<core::ProcessContext> &context, 
const ListS3::ListingState &latest_listing_state) {
+  std::unordered_map<std::string, std::string> state;
+  state[LATEST_LISTED_KEY_TIMESTAMP] = 
std::to_string(latest_listing_state.listed_key_timestamp);
+  for (std::size_t i = 0; i < latest_listing_state.listed_keys.size(); ++i) {
+    state[LATEST_LISTED_KEY_PREFIX + std::to_string(i)] = 
latest_listing_state.listed_keys.at(i);
+  }
+  logger_->log_debug("Stored new listed timestamp %lld", 
latest_listing_state.listed_key_timestamp);
+  state_manager_->set(state);
+}
+
+void ListS3::createNewFlowFile(
+    const std::shared_ptr<core::ProcessSession> &session,
+    const aws::s3::ListedObjectAttributes &object_attributes) {
+  auto flow_file = session->create();
+  session->putAttribute(flow_file, "s3.bucket", list_request_params_.bucket);
+  session->putAttribute(flow_file, core::SpecialFlowAttribute::FILENAME, 
object_attributes.filename);
+  session->putAttribute(flow_file, "s3.etag", object_attributes.etag);
+  session->putAttribute(flow_file, "s3.isLatest", object_attributes.is_latest 
? "true" : "false");
+  session->putAttribute(flow_file, "s3.lastModified", 
std::to_string(object_attributes.last_modified));
+  session->putAttribute(flow_file, "s3.length", 
std::to_string(object_attributes.length));
+  session->putAttribute(flow_file, "s3.storeClass", 
object_attributes.store_class);
+  if (!object_attributes.version.empty()) {
+    session->putAttribute(flow_file, "s3.version", object_attributes.version);
+  }
+  writeObjectTags(list_request_params_.bucket, object_attributes, session, 
flow_file);
+  writeUserMetadata(object_attributes, session, flow_file);
+
+  session->transfer(flow_file, Success);
+}
+
+void ListS3::onTrigger(const std::shared_ptr<core::ProcessContext> &context, 
const std::shared_ptr<core::ProcessSession> &session) {
+  logger_->log_debug("ListS3 onTrigger");
+
+  auto aws_results = s3_wrapper_.listBucket(list_request_params_);
+  if (!aws_results) {
+    logger_->log_error("Failed to list S3 bucket %s", 
list_request_params_.bucket);
+    context->yield();
+    return;
+  }
+
+  if (aws_results->size() == 0) {
+    logger_->log_debug("No S3 object were found in bucket %s", 
list_request_params_.bucket);
+    return;

Review comment:
       Updated in 
[8216738](https://github.com/apache/nifi-minifi-cpp/pull/975/commits/8216738921e0d5368e532b09aaf618fc61bcbaaf)

##########
File path: extensions/aws/s3/S3Wrapper.cpp
##########
@@ -30,46 +37,253 @@ namespace minifi {
 namespace aws {
 namespace s3 {
 
-minifi::utils::optional<Aws::S3::Model::PutObjectResult> 
S3Wrapper::sendPutObjectRequest(const Aws::S3::Model::PutObjectRequest& 
request) {
-  Aws::S3::S3Client s3_client(credentials_, client_config_);
-  auto outcome = s3_client.PutObject(request);
+void HeadObjectResult::setFilePaths(const std::string& key) {
+  absolute_path = key;
+  std::tie(path, filename) = minifi::utils::file::FileUtils::split_path(key, 
true /*force_posix*/);
+}
+
+S3Wrapper::S3Wrapper() : 
request_sender_(minifi::utils::make_unique<S3ClientRequestSender>()) {
+}
+
+S3Wrapper::S3Wrapper(std::unique_ptr<S3RequestSender> request_sender) : 
request_sender_(std::move(request_sender)) {
+}
+
+void S3Wrapper::setCredentials(const Aws::Auth::AWSCredentials& cred) {
+  request_sender_->setCredentials(cred);
+}
+
+void S3Wrapper::setRegion(const Aws::String& region) {
+  request_sender_->setRegion(region);
+}
+
+void S3Wrapper::setTimeout(uint64_t timeout) {
+  request_sender_->setTimeout(timeout);
+}
+
+void S3Wrapper::setEndpointOverrideUrl(const Aws::String& url) {
+  request_sender_->setEndpointOverrideUrl(url);
+}
+
+void S3Wrapper::setProxy(const ProxyOptions& proxy) {
+  request_sender_->setProxy(proxy);
+}
+
+void S3Wrapper::setCannedAcl(Aws::S3::Model::PutObjectRequest& request, const 
std::string& canned_acl) const {
+  if (canned_acl.empty() || CANNED_ACL_MAP.find(canned_acl) == 
CANNED_ACL_MAP.end())
+    return;
+
+  logger_->log_debug("Setting AWS canned ACL [%s]", canned_acl);
+  request.SetACL(CANNED_ACL_MAP.at(canned_acl));
+}
+
+Expiration S3Wrapper::getExpiration(const std::string& expiration) {
+  minifi::utils::Regex expr("expiry-date=\"(.*)\", rule-id=\"(.*)\"");
+  const auto match = expr.match(expiration);
+  const auto& results = expr.getResult();
+  if (!match || results.size() < 3)
+    return Expiration{};
+  return Expiration{results[1], results[2]};
+}
+
+std::string 
S3Wrapper::getEncryptionString(Aws::S3::Model::ServerSideEncryption encryption) 
{
+  if (encryption == Aws::S3::Model::ServerSideEncryption::NOT_SET) {
+    return "";
+  }
+
+  auto it = std::find_if(SERVER_SIDE_ENCRYPTION_MAP.begin(), 
SERVER_SIDE_ENCRYPTION_MAP.end(),
+    [&](const std::pair<std::string, const 
Aws::S3::Model::ServerSideEncryption&> pair) {
+      return pair.second == encryption;
+    });
+  if (it != SERVER_SIDE_ENCRYPTION_MAP.end()) {
+    return it->first;
+  }
+  return "";
+}
+
+minifi::utils::optional<PutObjectResult> S3Wrapper::putObject(const 
PutObjectRequestParameters& put_object_params, std::shared_ptr<Aws::IOStream> 
data_stream) {
+  Aws::S3::Model::PutObjectRequest request;
+  request.SetBucket(put_object_params.bucket);
+  request.SetKey(put_object_params.object_key);
+  
request.SetStorageClass(STORAGE_CLASS_MAP.at(put_object_params.storage_class));
+  
request.SetServerSideEncryption(SERVER_SIDE_ENCRYPTION_MAP.at(put_object_params.server_side_encryption));
+  request.SetContentType(put_object_params.content_type);
+  request.SetMetadata(put_object_params.user_metadata_map);
+  request.SetBody(data_stream);
+  request.SetGrantFullControl(put_object_params.fullcontrol_user_list);
+  request.SetGrantRead(put_object_params.read_permission_user_list);
+  request.SetGrantReadACP(put_object_params.read_acl_user_list);
+  request.SetGrantWriteACP(put_object_params.write_acl_user_list);
+  setCannedAcl(request, put_object_params.canned_acl);
+
+  auto aws_result = request_sender_->sendPutObjectRequest(request);
+  if (!aws_result) {
+    return minifi::utils::nullopt;
+  }
+
+  PutObjectResult result;
+  // Etags are returned by AWS in quoted form that should be removed
+  result.etag = 
minifi::utils::StringUtils::removeFramingCharacters(aws_result->GetETag(), '"');
+  result.version = aws_result->GetVersionId();
+
+  // GetExpiration returns a string pair with a date and a ruleid in 
'expiry-date=\"<DATE>\", rule-id=\"<RULEID>\"' format
+  // s3.expiration only needs the date member of this pair
+  result.expiration = 
getExpiration(aws_result->GetExpiration()).expiration_time;
+  result.ssealgorithm = 
getEncryptionString(aws_result->GetServerSideEncryption());
+  return result;
+}
+
+bool S3Wrapper::deleteObject(const std::string& bucket, const std::string& 
object_key, const std::string& version) {
+  Aws::S3::Model::DeleteObjectRequest request;
+  request.SetBucket(bucket);
+  request.SetKey(object_key);
+  if (!version.empty()) {
+    request.SetVersionId(version);
+  }
+  return request_sender_->sendDeleteObjectRequest(request);
+}
+
+int64_t S3Wrapper::writeFetchedBody(Aws::IOStream& source, const int64_t 
data_size, const std::shared_ptr<io::BaseStream>& output) {
+  static const uint64_t BUFFER_SIZE = 4096;
+  std::vector<uint8_t> buffer;
+  buffer.reserve(BUFFER_SIZE);
 
-  if (outcome.IsSuccess()) {
-      logger_->log_info("Added S3 object '%s' to bucket '%s'", 
request.GetKey(), request.GetBucket());
-      return outcome.GetResultWithOwnership();
-  } else {
-    logger_->log_error("PutS3Object failed with the following: '%s'", 
outcome.GetError().GetMessage());
+  int64_t write_size = 0;
+  while (write_size < data_size) {
+    auto next_write_size = data_size - write_size < BUFFER_SIZE ? data_size - 
write_size : BUFFER_SIZE;
+    if (!source.read(reinterpret_cast<char*>(buffer.data()), next_write_size)) 
{

Review comment:
       Even though I managed to make the `readsome` method work in the unit 
tests, it seems to fail in the docker tests. While reading from the stream, no 
data is returned and the callback's `process` method fails. The definition of 
`readsome` says that the function "Extracts up to count **immediately 
available** characters from the input stream." and notes that "The behavior of 
this function is **highly implementation-specific**." which seems to be 
unreliable. Some have noted in other words that the function only returns 
characters that are immediately available in the underlying buffer. Because of 
this I would prefer staying with the use of the `read` function in this case.




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


Reply via email to