lordgamez commented on a change in pull request #975:
URL: https://github.com/apache/nifi-minifi-cpp/pull/975#discussion_r562005670



##########
File path: extensions/aws/processors/ListS3.cpp
##########
@@ -0,0 +1,294 @@
+/**
+ * @file ListS3.cpp
+ * ListS3 class implementation
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ListS3.h"
+
+#include <tuple>
+#include <algorithm>
+#include <set>
+#include <utility>
+#include <memory>
+
+#include "utils/StringUtils.h"
+
+namespace org {
+namespace apache {
+namespace nifi {
+namespace minifi {
+namespace aws {
+namespace processors {
+
+const std::string ListS3::LATEST_LISTED_KEY_PREFIX = "listed_key.";
+const std::string ListS3::LATEST_LISTED_KEY_TIMESTAMP = "listed_key.timestamp";
+
+const core::Property ListS3::Delimiter(
+  core::PropertyBuilder::createProperty("Delimiter")
+    ->withDescription("The string used to delimit directories within the 
bucket. Please consult the AWS documentation for the correct use of this 
field.")
+    ->build());
+const core::Property ListS3::Prefix(
+  core::PropertyBuilder::createProperty("Prefix")
+    ->withDescription("The prefix used to filter the object list. In most 
cases, it should end with a forward slash ('/').")
+    ->build());
+const core::Property ListS3::UseVersions(
+  core::PropertyBuilder::createProperty("Use Versions")
+    ->isRequired(true)
+    ->withDefaultValue<bool>(false)
+    ->withDescription("Specifies whether to use S3 versions, if applicable. If 
false, only the latest version of each object will be returned.")
+    ->build());
+const core::Property ListS3::MinimumObjectAge(
+  core::PropertyBuilder::createProperty("Minimum Object Age")
+    ->isRequired(true)
+    ->withDefaultValue<core::TimePeriodValue>("0 sec")
+    ->withDescription("The minimum age that an S3 object must be in order to 
be considered; any object younger than this amount of time (according to last 
modification date) will be ignored.")
+    ->build());
+const core::Property ListS3::WriteObjectTags(
+  core::PropertyBuilder::createProperty("Write Object Tags")
+    ->isRequired(true)
+    ->withDefaultValue<bool>(false)
+    ->withDescription("If set to 'true', the tags associated with the S3 
object will be written as FlowFile attributes.")
+    ->build());
+const core::Property ListS3::WriteUserMetadata(
+  core::PropertyBuilder::createProperty("Write User Metadata")
+    ->isRequired(true)
+    ->withDefaultValue<bool>(false)
+    ->withDescription("If set to 'true', the user defined metadata associated 
with the S3 object will be added to FlowFile attributes/records.")
+    ->build());
+const core::Property ListS3::RequesterPays(
+  core::PropertyBuilder::createProperty("Requester Pays")
+    ->isRequired(true)
+    ->withDefaultValue<bool>(false)
+    ->withDescription("If true, indicates that the requester consents to pay 
any charges associated with listing the S3 bucket. This sets the 
'x-amz-request-payer' header to 'requester'. "
+                      "Note that this setting is only used if Write User 
Metadata is true.")
+    ->build());
+
+const core::Relationship ListS3::Success("success", "FlowFiles are routed to 
success relationship");
+
+void ListS3::initialize() {
+  // Add new supported properties
+  updateSupportedProperties({Delimiter, Prefix, UseVersions, MinimumObjectAge, 
WriteObjectTags, WriteUserMetadata, RequesterPays});
+  // Set the supported relationships
+  setSupportedRelationships({Success});
+}
+
+void ListS3::onSchedule(const std::shared_ptr<core::ProcessContext> &context, 
const std::shared_ptr<core::ProcessSessionFactory> &sessionFactory) {
+  S3Processor::onSchedule(context, sessionFactory);
+
+  state_manager_ = context->getStateManager();
+  if (state_manager_ == nullptr) {
+    throw Exception(PROCESSOR_EXCEPTION, "Failed to get StateManager");
+  }
+
+  auto common_properties = getCommonELSupportedProperties(context, nullptr);
+  if (!common_properties) {
+    throw Exception(PROCESS_SCHEDULE_EXCEPTION, "Required property is not set 
or invalid");
+  }
+  configureS3Wrapper(common_properties.value());
+  list_request_params_.bucket = common_properties->bucket;
+
+  context->getProperty(Delimiter.getName(), list_request_params_.delimiter);
+  logger_->log_debug("ListS3: Delimiter [%s]", list_request_params_.delimiter);
+
+  context->getProperty(Prefix.getName(), list_request_params_.prefix);
+  logger_->log_debug("ListS3: Prefix [%s]", list_request_params_.prefix);
+
+  context->getProperty(UseVersions.getName(), 
list_request_params_.use_versions);
+  logger_->log_debug("ListS3: UseVersions [%s]", 
list_request_params_.use_versions ? "true" : "false");
+
+  std::string min_obj_age_str;
+  if (!context->getProperty(MinimumObjectAge.getName(), min_obj_age_str) || 
min_obj_age_str.empty() || 
!core::Property::getTimeMSFromString(min_obj_age_str, 
list_request_params_.min_object_age)) {
+    throw Exception(PROCESS_SCHEDULE_EXCEPTION, "Minimum Object Age missing or 
invalid");
+  }
+  logger_->log_debug("S3Processor: Minimum Object Age [%llud]", 
min_obj_age_str, list_request_params_.min_object_age);
+
+  context->getProperty(WriteObjectTags.getName(), write_object_tags_);
+  logger_->log_debug("ListS3: WriteObjectTags [%s]", write_object_tags_ ? 
"true" : "false");
+
+  context->getProperty(WriteUserMetadata.getName(), write_user_metadata_);
+  logger_->log_debug("ListS3: WriteUserMetadata [%s]", write_user_metadata_ ? 
"true" : "false");
+
+  context->getProperty(RequesterPays.getName(), requester_pays_);
+  logger_->log_debug("ListS3: RequesterPays [%s]", requester_pays_ ? "true" : 
"false");
+}
+
+void ListS3::writeObjectTags(
+    const std::string &bucket,
+    const aws::s3::ListedObjectAttributes &object_attributes,
+    const std::shared_ptr<core::ProcessSession> &session,
+    const std::shared_ptr<core::FlowFile> &flow_file) {
+  if (!write_object_tags_) {
+    return;
+  }
+
+  auto get_object_tags_result = s3_wrapper_.getObjectTags(bucket, 
object_attributes.filename, object_attributes.version);
+  if (get_object_tags_result) {
+    for (const auto& tag : get_object_tags_result.value()) {
+      session->putAttribute(flow_file, "s3.tag." + tag.first, tag.second);
+    }
+  } else {
+    logger_->log_warn("Failed to get object tags for object %s in bucket %s", 
object_attributes.filename, bucket);
+  }
+}
+
+void ListS3::writeUserMetadata(
+    const aws::s3::ListedObjectAttributes &object_attributes,
+    const std::shared_ptr<core::ProcessSession> &session,
+    const std::shared_ptr<core::FlowFile> &flow_file) {
+  if (!write_user_metadata_) {
+    return;
+  }
+
+  aws::s3::HeadObjectRequestParameters params;
+  params.bucket = list_request_params_.bucket;
+  params.object_key = object_attributes.filename;
+  params.version = object_attributes.version;
+  params.requester_pays = requester_pays_;
+  auto head_object_tags_result = s3_wrapper_.headObject(params);
+  if (head_object_tags_result) {
+    for (const auto& metadata : head_object_tags_result->user_metadata_map) {
+      session->putAttribute(flow_file, "s3.user.metadata." + metadata.first, 
metadata.second);
+    }
+  } else {
+    logger_->log_warn("Failed to get object metadata for object %s in bucket 
%s", params.object_key, params.bucket);
+  }
+}
+
+std::vector<std::string> ListS3::getLatestListedKeys(const 
std::unordered_map<std::string, std::string> &state) {
+  std::vector<std::string> latest_listed_keys;
+  for (const auto& kvp : state) {
+    if (kvp.first.rfind(LATEST_LISTED_KEY_PREFIX, 0) == 0) {
+      latest_listed_keys.push_back(kvp.second);
+    }
+  }
+  return latest_listed_keys;
+}
+
+uint64_t ListS3::getLatestListedKeyTimestamp(const 
std::unordered_map<std::string, std::string> &state) {
+  std::string stored_listed_key_timestamp_str;
+  auto it = state.find(LATEST_LISTED_KEY_TIMESTAMP);
+  if (it != state.end()) {
+    stored_listed_key_timestamp_str = it->second;
+  }
+
+  int64_t stored_listed_key_timestamp = 0;
+  core::Property::StringToInt(stored_listed_key_timestamp_str, 
stored_listed_key_timestamp);
+
+  return stored_listed_key_timestamp;
+}
+
+ListS3::ListingState ListS3::getCurrentState(const 
std::shared_ptr<core::ProcessContext> &context) {
+  ListS3::ListingState current_listing_state;
+  std::unordered_map<std::string, std::string> state;
+  if (!state_manager_->get(state)) {
+    logger_->log_info("No stored state for listed objects was found");
+    return current_listing_state;
+  }
+
+  current_listing_state.listed_key_timestamp = 
getLatestListedKeyTimestamp(state);
+  logger_->log_debug("Restored previous listed timestamp %lld", 
current_listing_state.listed_key_timestamp);
+
+  current_listing_state.listed_keys = getLatestListedKeys(state);
+  return current_listing_state;
+}
+
+void ListS3::storeState(const std::shared_ptr<core::ProcessContext> &context, 
const ListS3::ListingState &latest_listing_state) {
+  std::unordered_map<std::string, std::string> state;
+  state[LATEST_LISTED_KEY_TIMESTAMP] = 
std::to_string(latest_listing_state.listed_key_timestamp);
+  for (std::size_t i = 0; i < latest_listing_state.listed_keys.size(); ++i) {
+    state[LATEST_LISTED_KEY_PREFIX + std::to_string(i)] = 
latest_listing_state.listed_keys.at(i);
+  }
+  logger_->log_debug("Stored new listed timestamp %lld", 
latest_listing_state.listed_key_timestamp);
+  state_manager_->set(state);
+}
+
+void ListS3::createNewFlowFile(
+    const std::shared_ptr<core::ProcessSession> &session,
+    const aws::s3::ListedObjectAttributes &object_attributes) {
+  auto flow_file = session->create();
+  session->putAttribute(flow_file, "s3.bucket", list_request_params_.bucket);
+  session->putAttribute(flow_file, core::SpecialFlowAttribute::FILENAME, 
object_attributes.filename);
+  session->putAttribute(flow_file, "s3.etag", object_attributes.etag);
+  session->putAttribute(flow_file, "s3.isLatest", object_attributes.is_latest 
? "true" : "false");
+  session->putAttribute(flow_file, "s3.lastModified", 
std::to_string(object_attributes.last_modified));
+  session->putAttribute(flow_file, "s3.length", 
std::to_string(object_attributes.length));
+  session->putAttribute(flow_file, "s3.storeClass", 
object_attributes.store_class);
+  if (!object_attributes.version.empty()) {
+    session->putAttribute(flow_file, "s3.version", object_attributes.version);
+  }
+  writeObjectTags(list_request_params_.bucket, object_attributes, session, 
flow_file);
+  writeUserMetadata(object_attributes, session, flow_file);
+
+  session->transfer(flow_file, Success);
+}
+
+void ListS3::onTrigger(const std::shared_ptr<core::ProcessContext> &context, 
const std::shared_ptr<core::ProcessSession> &session) {
+  logger_->log_debug("ListS3 onTrigger");
+
+  auto aws_results = s3_wrapper_.listBucket(list_request_params_);
+  if (!aws_results) {
+    logger_->log_error("Failed to list S3 bucket %s", 
list_request_params_.bucket);
+    context->yield();
+    return;
+  }
+
+  if (aws_results->size() == 0) {
+    logger_->log_debug("No S3 object were found in bucket %s", 
list_request_params_.bucket);
+    return;

Review comment:
       I'm not sure, even though we could yield if we did not get any data just 
to wait some more time for additional objects, we cannot be sure, that the next 
schedule time will not provide new data.

##########
File path: extensions/aws/s3/S3Wrapper.h
##########
@@ -31,11 +47,190 @@ namespace minifi {
 namespace aws {
 namespace s3 {
 
-class S3Wrapper : public S3WrapperBase {
+static const std::unordered_map<std::string, Aws::S3::Model::StorageClass> 
STORAGE_CLASS_MAP {
+  {"Standard", Aws::S3::Model::StorageClass::STANDARD},
+  {"ReducedRedundancy", Aws::S3::Model::StorageClass::REDUCED_REDUNDANCY},
+  {"StandardIA", Aws::S3::Model::StorageClass::STANDARD_IA},
+  {"OnezoneIA", Aws::S3::Model::StorageClass::ONEZONE_IA},
+  {"IntelligentTiering", Aws::S3::Model::StorageClass::INTELLIGENT_TIERING},
+  {"Glacier", Aws::S3::Model::StorageClass::GLACIER},
+  {"DeepArchive", Aws::S3::Model::StorageClass::DEEP_ARCHIVE}
+};
+
+static const std::unordered_map<Aws::S3::Model::ObjectStorageClass, 
std::string> OBJECT_STORAGE_CLASS_MAP {
+  {Aws::S3::Model::ObjectStorageClass::STANDARD, "Standard"},
+  {Aws::S3::Model::ObjectStorageClass::REDUCED_REDUNDANCY, 
"ReducedRedundancy"},
+  {Aws::S3::Model::ObjectStorageClass::STANDARD_IA, "StandardIA"},
+  {Aws::S3::Model::ObjectStorageClass::ONEZONE_IA, "OnezoneIA"},
+  {Aws::S3::Model::ObjectStorageClass::INTELLIGENT_TIERING, 
"IntelligentTiering"},
+  {Aws::S3::Model::ObjectStorageClass::GLACIER, "Glacier"},
+  {Aws::S3::Model::ObjectStorageClass::DEEP_ARCHIVE, "DeepArchive"}
+};
+
+static const std::unordered_map<Aws::S3::Model::ObjectVersionStorageClass, 
std::string> VERSION_STORAGE_CLASS_MAP {
+  {Aws::S3::Model::ObjectVersionStorageClass::STANDARD, "Standard"}
+};
+
+static const std::unordered_map<std::string, 
Aws::S3::Model::ServerSideEncryption> SERVER_SIDE_ENCRYPTION_MAP {
+  {"None", Aws::S3::Model::ServerSideEncryption::NOT_SET},
+  {"AES256", Aws::S3::Model::ServerSideEncryption::AES256},
+  {"aws_kms", Aws::S3::Model::ServerSideEncryption::aws_kms},
+};
+
+static const std::unordered_map<std::string, Aws::S3::Model::ObjectCannedACL> 
CANNED_ACL_MAP {
+  {"BucketOwnerFullControl", 
Aws::S3::Model::ObjectCannedACL::bucket_owner_full_control},
+  {"BucketOwnerRead", Aws::S3::Model::ObjectCannedACL::bucket_owner_read},
+  {"AuthenticatedRead", Aws::S3::Model::ObjectCannedACL::authenticated_read},
+  {"PublicReadWrite", Aws::S3::Model::ObjectCannedACL::public_read_write},
+  {"PublicRead", Aws::S3::Model::ObjectCannedACL::public_read},
+  {"Private", Aws::S3::Model::ObjectCannedACL::private_},
+  {"AwsExecRead", Aws::S3::Model::ObjectCannedACL::aws_exec_read},
+};
+
+struct Expiration {
+  std::string expiration_time;
+  std::string expiration_time_rule_id;
+};
+
+struct PutObjectResult {
+  std::string version;
+  std::string etag;
+  std::string expiration;
+  std::string ssealgorithm;
+};
+
+struct PutObjectRequestParameters {
+  std::string bucket;
+  std::string object_key;
+  std::string storage_class;
+  std::string server_side_encryption;
+  std::string content_type;
+  std::map<std::string, std::string> user_metadata_map;
+  std::string fullcontrol_user_list;
+  std::string read_permission_user_list;
+  std::string read_acl_user_list;
+  std::string write_acl_user_list;
+  std::string canned_acl;
+};
+
+struct GetObjectRequestParameters {
+  std::string bucket;
+  std::string object_key;
+  std::string version;
+  bool requester_pays = false;
+};
+
+struct HeadObjectResult {
+  std::string path;
+  std::string absolute_path;
+  std::string filename;
+  std::string mime_type;
+  std::string etag;
+  Expiration expiration;
+  std::string ssealgorithm;
+  std::string version;
+  std::map<std::string, std::string> user_metadata_map;
+
+  void setFilePaths(const std::string& key);
+};
+
+struct GetObjectResult : public HeadObjectResult {
+  int64_t write_size = 0;
+};
+
+struct ListRequestParameters {
+  std::string bucket;
+  std::string delimiter;
+  std::string prefix;
+  bool use_versions = false;
+  uint64_t min_object_age = 0;
+};
+
+struct ListedObjectAttributes {
+  std::string filename;
+  std::string etag;
+  bool is_latest = false;
+  int64_t last_modified = 0;
+  int length = 0;
+  std::string store_class;
+  std::string version;

Review comment:
       I don't think in this case we would have a benefit for this field to be 
optional as an empty version is not a valid value, so we cannot really 
distinguish an empty version from an unset version, they should mean the same. 
I think here it is easier just to check for an empty string if we want to check 
if it has been set.

##########
File path: extensions/aws/processors/ListS3.cpp
##########
@@ -0,0 +1,294 @@
+/**
+ * @file ListS3.cpp
+ * ListS3 class implementation
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ListS3.h"
+
+#include <tuple>
+#include <algorithm>
+#include <set>
+#include <utility>
+#include <memory>
+
+#include "utils/StringUtils.h"
+
+namespace org {
+namespace apache {
+namespace nifi {
+namespace minifi {
+namespace aws {
+namespace processors {
+
+const std::string ListS3::LATEST_LISTED_KEY_PREFIX = "listed_key.";
+const std::string ListS3::LATEST_LISTED_KEY_TIMESTAMP = "listed_key.timestamp";
+
+const core::Property ListS3::Delimiter(
+  core::PropertyBuilder::createProperty("Delimiter")
+    ->withDescription("The string used to delimit directories within the 
bucket. Please consult the AWS documentation for the correct use of this 
field.")
+    ->build());
+const core::Property ListS3::Prefix(
+  core::PropertyBuilder::createProperty("Prefix")
+    ->withDescription("The prefix used to filter the object list. In most 
cases, it should end with a forward slash ('/').")
+    ->build());
+const core::Property ListS3::UseVersions(
+  core::PropertyBuilder::createProperty("Use Versions")
+    ->isRequired(true)
+    ->withDefaultValue<bool>(false)
+    ->withDescription("Specifies whether to use S3 versions, if applicable. If 
false, only the latest version of each object will be returned.")
+    ->build());
+const core::Property ListS3::MinimumObjectAge(
+  core::PropertyBuilder::createProperty("Minimum Object Age")
+    ->isRequired(true)
+    ->withDefaultValue<core::TimePeriodValue>("0 sec")
+    ->withDescription("The minimum age that an S3 object must be in order to 
be considered; any object younger than this amount of time (according to last 
modification date) will be ignored.")
+    ->build());
+const core::Property ListS3::WriteObjectTags(
+  core::PropertyBuilder::createProperty("Write Object Tags")
+    ->isRequired(true)
+    ->withDefaultValue<bool>(false)
+    ->withDescription("If set to 'true', the tags associated with the S3 
object will be written as FlowFile attributes.")
+    ->build());
+const core::Property ListS3::WriteUserMetadata(
+  core::PropertyBuilder::createProperty("Write User Metadata")
+    ->isRequired(true)
+    ->withDefaultValue<bool>(false)
+    ->withDescription("If set to 'true', the user defined metadata associated 
with the S3 object will be added to FlowFile attributes/records.")
+    ->build());
+const core::Property ListS3::RequesterPays(
+  core::PropertyBuilder::createProperty("Requester Pays")
+    ->isRequired(true)
+    ->withDefaultValue<bool>(false)
+    ->withDescription("If true, indicates that the requester consents to pay 
any charges associated with listing the S3 bucket. This sets the 
'x-amz-request-payer' header to 'requester'. "
+                      "Note that this setting is only used if Write User 
Metadata is true.")
+    ->build());
+
+const core::Relationship ListS3::Success("success", "FlowFiles are routed to 
success relationship");
+
+void ListS3::initialize() {
+  // Add new supported properties
+  updateSupportedProperties({Delimiter, Prefix, UseVersions, MinimumObjectAge, 
WriteObjectTags, WriteUserMetadata, RequesterPays});
+  // Set the supported relationships
+  setSupportedRelationships({Success});
+}
+
+void ListS3::onSchedule(const std::shared_ptr<core::ProcessContext> &context, 
const std::shared_ptr<core::ProcessSessionFactory> &sessionFactory) {
+  S3Processor::onSchedule(context, sessionFactory);
+
+  state_manager_ = context->getStateManager();
+  if (state_manager_ == nullptr) {
+    throw Exception(PROCESSOR_EXCEPTION, "Failed to get StateManager");
+  }
+
+  auto common_properties = getCommonELSupportedProperties(context, nullptr);

Review comment:
       As far as I see calling the getProperty with nullptr a 
minifi::expression::Parameters is created with a weak_ptr from the passed 
flow_file (where the default value is also nullptr) before evaluating the 
expression. Every time the the flow_file is used it tries to lock the weak_ptr 
and only uses it when the lock succeeds, so it should be safe.

##########
File path: extensions/aws/s3/S3Wrapper.cpp
##########
@@ -30,46 +37,253 @@ namespace minifi {
 namespace aws {
 namespace s3 {
 
-minifi::utils::optional<Aws::S3::Model::PutObjectResult> 
S3Wrapper::sendPutObjectRequest(const Aws::S3::Model::PutObjectRequest& 
request) {
-  Aws::S3::S3Client s3_client(credentials_, client_config_);
-  auto outcome = s3_client.PutObject(request);
+void HeadObjectResult::setFilePaths(const std::string& key) {
+  absolute_path = key;
+  std::tie(path, filename) = minifi::utils::file::FileUtils::split_path(key, 
true /*force_posix*/);
+}
+
+S3Wrapper::S3Wrapper() : 
request_sender_(minifi::utils::make_unique<S3ClientRequestSender>()) {
+}
+
+S3Wrapper::S3Wrapper(std::unique_ptr<S3RequestSender> request_sender) : 
request_sender_(std::move(request_sender)) {
+}
+
+void S3Wrapper::setCredentials(const Aws::Auth::AWSCredentials& cred) {
+  request_sender_->setCredentials(cred);
+}
+
+void S3Wrapper::setRegion(const Aws::String& region) {
+  request_sender_->setRegion(region);
+}
+
+void S3Wrapper::setTimeout(uint64_t timeout) {
+  request_sender_->setTimeout(timeout);
+}
+
+void S3Wrapper::setEndpointOverrideUrl(const Aws::String& url) {
+  request_sender_->setEndpointOverrideUrl(url);
+}
+
+void S3Wrapper::setProxy(const ProxyOptions& proxy) {
+  request_sender_->setProxy(proxy);
+}
+
+void S3Wrapper::setCannedAcl(Aws::S3::Model::PutObjectRequest& request, const 
std::string& canned_acl) const {
+  if (canned_acl.empty() || CANNED_ACL_MAP.find(canned_acl) == 
CANNED_ACL_MAP.end())
+    return;
+
+  logger_->log_debug("Setting AWS canned ACL [%s]", canned_acl);
+  request.SetACL(CANNED_ACL_MAP.at(canned_acl));
+}
+
+Expiration S3Wrapper::getExpiration(const std::string& expiration) {
+  minifi::utils::Regex expr("expiry-date=\"(.*)\", rule-id=\"(.*)\"");
+  const auto match = expr.match(expiration);
+  const auto& results = expr.getResult();
+  if (!match || results.size() < 3)
+    return Expiration{};
+  return Expiration{results[1], results[2]};
+}
+
+std::string 
S3Wrapper::getEncryptionString(Aws::S3::Model::ServerSideEncryption encryption) 
{
+  if (encryption == Aws::S3::Model::ServerSideEncryption::NOT_SET) {
+    return "";
+  }
+
+  auto it = std::find_if(SERVER_SIDE_ENCRYPTION_MAP.begin(), 
SERVER_SIDE_ENCRYPTION_MAP.end(),
+    [&](const std::pair<std::string, const 
Aws::S3::Model::ServerSideEncryption&> pair) {
+      return pair.second == encryption;
+    });
+  if (it != SERVER_SIDE_ENCRYPTION_MAP.end()) {
+    return it->first;
+  }
+  return "";
+}
+
+minifi::utils::optional<PutObjectResult> S3Wrapper::putObject(const 
PutObjectRequestParameters& put_object_params, std::shared_ptr<Aws::IOStream> 
data_stream) {
+  Aws::S3::Model::PutObjectRequest request;
+  request.SetBucket(put_object_params.bucket);
+  request.SetKey(put_object_params.object_key);
+  
request.SetStorageClass(STORAGE_CLASS_MAP.at(put_object_params.storage_class));
+  
request.SetServerSideEncryption(SERVER_SIDE_ENCRYPTION_MAP.at(put_object_params.server_side_encryption));
+  request.SetContentType(put_object_params.content_type);
+  request.SetMetadata(put_object_params.user_metadata_map);
+  request.SetBody(data_stream);
+  request.SetGrantFullControl(put_object_params.fullcontrol_user_list);
+  request.SetGrantRead(put_object_params.read_permission_user_list);
+  request.SetGrantReadACP(put_object_params.read_acl_user_list);
+  request.SetGrantWriteACP(put_object_params.write_acl_user_list);
+  setCannedAcl(request, put_object_params.canned_acl);
+
+  auto aws_result = request_sender_->sendPutObjectRequest(request);
+  if (!aws_result) {
+    return minifi::utils::nullopt;
+  }
+
+  PutObjectResult result;
+  // Etags are returned by AWS in quoted form that should be removed
+  result.etag = 
minifi::utils::StringUtils::removeFramingCharacters(aws_result->GetETag(), '"');
+  result.version = aws_result->GetVersionId();
+
+  // GetExpiration returns a string pair with a date and a ruleid in 
'expiry-date=\"<DATE>\", rule-id=\"<RULEID>\"' format
+  // s3.expiration only needs the date member of this pair
+  result.expiration = 
getExpiration(aws_result->GetExpiration()).expiration_time;
+  result.ssealgorithm = 
getEncryptionString(aws_result->GetServerSideEncryption());
+  return result;
+}
+
+bool S3Wrapper::deleteObject(const std::string& bucket, const std::string& 
object_key, const std::string& version) {
+  Aws::S3::Model::DeleteObjectRequest request;
+  request.SetBucket(bucket);
+  request.SetKey(object_key);
+  if (!version.empty()) {
+    request.SetVersionId(version);
+  }
+  return request_sender_->sendDeleteObjectRequest(request);
+}
+
+int64_t S3Wrapper::writeFetchedBody(Aws::IOStream& source, const int64_t 
data_size, const std::shared_ptr<io::BaseStream>& output) {
+  static const uint64_t BUFFER_SIZE = 4096;
+  std::vector<uint8_t> buffer;
+  buffer.reserve(BUFFER_SIZE);
 
-  if (outcome.IsSuccess()) {
-      logger_->log_info("Added S3 object '%s' to bucket '%s'", 
request.GetKey(), request.GetBucket());
-      return outcome.GetResultWithOwnership();
-  } else {
-    logger_->log_error("PutS3Object failed with the following: '%s'", 
outcome.GetError().GetMessage());
+  int64_t write_size = 0;
+  while (write_size < data_size) {
+    auto next_write_size = data_size - write_size < BUFFER_SIZE ? data_size - 
write_size : BUFFER_SIZE;
+    if (!source.read(reinterpret_cast<char*>(buffer.data()), next_write_size)) 
{
+      return -1;
+    }
+    auto ret = output->write(buffer.data(), next_write_size);
+    if (ret < 0) {
+      return ret;
+    }
+    write_size += next_write_size;
+  }
+  return write_size;
+}
+
+minifi::utils::optional<GetObjectResult> S3Wrapper::getObject(const 
GetObjectRequestParameters& get_object_params, const 
std::shared_ptr<io::BaseStream>& out_body) {
+  auto request = 
createFetchObjectRequest<Aws::S3::Model::GetObjectRequest>(get_object_params);
+  auto aws_result = request_sender_->sendGetObjectRequest(request);
+  if (!aws_result) {
     return minifi::utils::nullopt;
   }
+  auto result = fillFetchObjectResult<Aws::S3::Model::GetObjectResult, 
GetObjectResult>(get_object_params, aws_result.value());
+  result.write_size = writeFetchedBody(aws_result->GetBody(), 
aws_result->GetContentLength(), out_body);
+  return result;
+}
+
+void S3Wrapper::addListResults(const 
Aws::Vector<Aws::S3::Model::ObjectVersion>& content, const uint64_t 
min_object_age, std::vector<ListedObjectAttributes>& listed_objects) {
+  for (const auto& version : content) {
+    if (last_bucket_list_timestamp_ - min_object_age < 
version.GetLastModified().Millis()) {
+      logger_->log_debug("Object version '%s' of key '%s' skipped due to 
minimum object age filter", version.GetVersionId(), version.GetKey());
+      continue;
+    }
+
+    ListedObjectAttributes attributes;
+    attributes.etag = 
minifi::utils::StringUtils::removeFramingCharacters(version.GetETag(), '"');
+    attributes.filename = version.GetKey();
+    attributes.is_latest = version.GetIsLatest();
+    attributes.last_modified = version.GetLastModified().Millis();
+    attributes.length = version.GetSize();
+    attributes.store_class = 
VERSION_STORAGE_CLASS_MAP.at(version.GetStorageClass());
+    attributes.version = version.GetVersionId();
+    listed_objects.push_back(attributes);
+  }
 }
 
-bool S3Wrapper::sendDeleteObjectRequest(const 
Aws::S3::Model::DeleteObjectRequest& request) {
-  Aws::S3::S3Client s3_client(credentials_, client_config_);
-  Aws::S3::Model::DeleteObjectOutcome outcome = 
s3_client.DeleteObject(request);
+void S3Wrapper::addListResults(const Aws::Vector<Aws::S3::Model::Object>& 
content, const uint64_t min_object_age, std::vector<ListedObjectAttributes>& 
listed_objects) {
+  for (const auto& object : content) {
+    if (last_bucket_list_timestamp_ - min_object_age < 
object.GetLastModified().Millis()) {
+      logger_->log_debug("Object with key '%s' skipped due to minimum object 
age filter", object.GetKey());
+      continue;
+    }
 
-  if (outcome.IsSuccess()) {
-    logger_->log_info("Deleted S3 object '%s' from bucket '%s'", 
request.GetKey(), request.GetBucket());
-    return true;
-  } else if (outcome.GetError().GetErrorType() == 
Aws::S3::S3Errors::NO_SUCH_KEY) {
-    logger_->log_info("S3 object '%s' was not found in bucket '%s'", 
request.GetKey(), request.GetBucket());
-    return true;
-  } else {
-    logger_->log_error("DeleteS3Object failed with the following: '%s'", 
outcome.GetError().GetMessage());
-    return false;
+    ListedObjectAttributes attributes;
+    attributes.etag = 
minifi::utils::StringUtils::removeFramingCharacters(object.GetETag(), '"');
+    attributes.filename = object.GetKey();
+    attributes.is_latest = true;
+    attributes.last_modified = object.GetLastModified().Millis();
+    attributes.length = object.GetSize();
+    attributes.store_class = 
OBJECT_STORAGE_CLASS_MAP.at(object.GetStorageClass());
+    listed_objects.push_back(attributes);
   }
 }
 
-minifi::utils::optional<Aws::S3::Model::GetObjectResult> 
S3Wrapper::sendGetObjectRequest(const Aws::S3::Model::GetObjectRequest& 
request) {
-  Aws::S3::S3Client s3_client(credentials_, client_config_);
-  auto outcome = s3_client.GetObject(request);
+minifi::utils::optional<std::vector<ListedObjectAttributes>> 
S3Wrapper::listVersions(const ListRequestParameters& params) {
+  auto request = 
createListRequest<Aws::S3::Model::ListObjectVersionsRequest>(params);
+  std::vector<ListedObjectAttributes> attribute_list;
+  nonstd::optional_lite::optional<Aws::S3::Model::ListObjectVersionsResult> 
aws_result;
+  do {
+    aws_result = request_sender_->sendListVersionsRequest(request);
+    if (!aws_result) {
+      return minifi::utils::nullopt;
+    }
+    const auto& versions = aws_result->GetVersions();
+    logger_->log_debug("AWS S3 List operation returned %zu versions. This 
result is truncated: %s", versions.size(), aws_result->GetIsTruncated() ? 
"true" : "false");
+    addListResults(versions, params.min_object_age, attribute_list);
+    if (aws_result->GetIsTruncated()) {
+      request.SetKeyMarker(aws_result->GetNextKeyMarker());
+      request.SetVersionIdMarker(aws_result->GetNextVersionIdMarker());
+    }
+  } while (aws_result->GetIsTruncated());
+
+  return attribute_list;
+}
+
+minifi::utils::optional<std::vector<ListedObjectAttributes>> 
S3Wrapper::listObjects(const ListRequestParameters& params) {
+  auto request = 
createListRequest<Aws::S3::Model::ListObjectsV2Request>(params);
+  std::vector<ListedObjectAttributes> attribute_list;
+  nonstd::optional_lite::optional<Aws::S3::Model::ListObjectsV2Result> 
aws_result;
+  do {
+    aws_result = request_sender_->sendListObjectsRequest(request);
+    if (!aws_result) {
+      return minifi::utils::nullopt;
+    }
+    const auto& objects = aws_result->GetContents();
+    logger_->log_debug("AWS S3 List operation returned %d objects. This result 
is truncated: %s", objects.size(), aws_result->GetIsTruncated() ? "true" : 
"false");

Review comment:
       Fixed in 
[27ff518](https://github.com/apache/nifi-minifi-cpp/pull/975/commits/27ff518407185f7876d96dfac2566bfae9eda62b)

##########
File path: extensions/aws/s3/S3Wrapper.cpp
##########
@@ -30,46 +37,253 @@ namespace minifi {
 namespace aws {
 namespace s3 {
 
-minifi::utils::optional<Aws::S3::Model::PutObjectResult> 
S3Wrapper::sendPutObjectRequest(const Aws::S3::Model::PutObjectRequest& 
request) {
-  Aws::S3::S3Client s3_client(credentials_, client_config_);
-  auto outcome = s3_client.PutObject(request);
+void HeadObjectResult::setFilePaths(const std::string& key) {
+  absolute_path = key;
+  std::tie(path, filename) = minifi::utils::file::FileUtils::split_path(key, 
true /*force_posix*/);
+}
+
+S3Wrapper::S3Wrapper() : 
request_sender_(minifi::utils::make_unique<S3ClientRequestSender>()) {
+}
+
+S3Wrapper::S3Wrapper(std::unique_ptr<S3RequestSender> request_sender) : 
request_sender_(std::move(request_sender)) {
+}
+
+void S3Wrapper::setCredentials(const Aws::Auth::AWSCredentials& cred) {
+  request_sender_->setCredentials(cred);
+}
+
+void S3Wrapper::setRegion(const Aws::String& region) {
+  request_sender_->setRegion(region);
+}
+
+void S3Wrapper::setTimeout(uint64_t timeout) {
+  request_sender_->setTimeout(timeout);
+}
+
+void S3Wrapper::setEndpointOverrideUrl(const Aws::String& url) {
+  request_sender_->setEndpointOverrideUrl(url);
+}
+
+void S3Wrapper::setProxy(const ProxyOptions& proxy) {
+  request_sender_->setProxy(proxy);
+}
+
+void S3Wrapper::setCannedAcl(Aws::S3::Model::PutObjectRequest& request, const 
std::string& canned_acl) const {
+  if (canned_acl.empty() || CANNED_ACL_MAP.find(canned_acl) == 
CANNED_ACL_MAP.end())
+    return;
+
+  logger_->log_debug("Setting AWS canned ACL [%s]", canned_acl);
+  request.SetACL(CANNED_ACL_MAP.at(canned_acl));
+}
+
+Expiration S3Wrapper::getExpiration(const std::string& expiration) {
+  minifi::utils::Regex expr("expiry-date=\"(.*)\", rule-id=\"(.*)\"");
+  const auto match = expr.match(expiration);
+  const auto& results = expr.getResult();
+  if (!match || results.size() < 3)
+    return Expiration{};
+  return Expiration{results[1], results[2]};
+}
+
+std::string 
S3Wrapper::getEncryptionString(Aws::S3::Model::ServerSideEncryption encryption) 
{
+  if (encryption == Aws::S3::Model::ServerSideEncryption::NOT_SET) {
+    return "";
+  }
+
+  auto it = std::find_if(SERVER_SIDE_ENCRYPTION_MAP.begin(), 
SERVER_SIDE_ENCRYPTION_MAP.end(),
+    [&](const std::pair<std::string, const 
Aws::S3::Model::ServerSideEncryption&> pair) {
+      return pair.second == encryption;
+    });
+  if (it != SERVER_SIDE_ENCRYPTION_MAP.end()) {
+    return it->first;
+  }
+  return "";
+}
+
+minifi::utils::optional<PutObjectResult> S3Wrapper::putObject(const 
PutObjectRequestParameters& put_object_params, std::shared_ptr<Aws::IOStream> 
data_stream) {
+  Aws::S3::Model::PutObjectRequest request;
+  request.SetBucket(put_object_params.bucket);
+  request.SetKey(put_object_params.object_key);
+  
request.SetStorageClass(STORAGE_CLASS_MAP.at(put_object_params.storage_class));
+  
request.SetServerSideEncryption(SERVER_SIDE_ENCRYPTION_MAP.at(put_object_params.server_side_encryption));
+  request.SetContentType(put_object_params.content_type);
+  request.SetMetadata(put_object_params.user_metadata_map);
+  request.SetBody(data_stream);
+  request.SetGrantFullControl(put_object_params.fullcontrol_user_list);
+  request.SetGrantRead(put_object_params.read_permission_user_list);
+  request.SetGrantReadACP(put_object_params.read_acl_user_list);
+  request.SetGrantWriteACP(put_object_params.write_acl_user_list);
+  setCannedAcl(request, put_object_params.canned_acl);
+
+  auto aws_result = request_sender_->sendPutObjectRequest(request);
+  if (!aws_result) {
+    return minifi::utils::nullopt;
+  }
+
+  PutObjectResult result;
+  // Etags are returned by AWS in quoted form that should be removed
+  result.etag = 
minifi::utils::StringUtils::removeFramingCharacters(aws_result->GetETag(), '"');
+  result.version = aws_result->GetVersionId();
+
+  // GetExpiration returns a string pair with a date and a ruleid in 
'expiry-date=\"<DATE>\", rule-id=\"<RULEID>\"' format
+  // s3.expiration only needs the date member of this pair
+  result.expiration = 
getExpiration(aws_result->GetExpiration()).expiration_time;
+  result.ssealgorithm = 
getEncryptionString(aws_result->GetServerSideEncryption());
+  return result;
+}
+
+bool S3Wrapper::deleteObject(const std::string& bucket, const std::string& 
object_key, const std::string& version) {
+  Aws::S3::Model::DeleteObjectRequest request;
+  request.SetBucket(bucket);
+  request.SetKey(object_key);
+  if (!version.empty()) {
+    request.SetVersionId(version);
+  }
+  return request_sender_->sendDeleteObjectRequest(request);
+}
+
+int64_t S3Wrapper::writeFetchedBody(Aws::IOStream& source, const int64_t 
data_size, const std::shared_ptr<io::BaseStream>& output) {
+  static const uint64_t BUFFER_SIZE = 4096;
+  std::vector<uint8_t> buffer;
+  buffer.reserve(BUFFER_SIZE);
 
-  if (outcome.IsSuccess()) {
-      logger_->log_info("Added S3 object '%s' to bucket '%s'", 
request.GetKey(), request.GetBucket());
-      return outcome.GetResultWithOwnership();
-  } else {
-    logger_->log_error("PutS3Object failed with the following: '%s'", 
outcome.GetError().GetMessage());
+  int64_t write_size = 0;
+  while (write_size < data_size) {
+    auto next_write_size = data_size - write_size < BUFFER_SIZE ? data_size - 
write_size : BUFFER_SIZE;

Review comment:
       Fixed in 
[27ff518](https://github.com/apache/nifi-minifi-cpp/pull/975/commits/27ff518407185f7876d96dfac2566bfae9eda62b)

##########
File path: extensions/aws/s3/S3Wrapper.cpp
##########
@@ -30,46 +37,253 @@ namespace minifi {
 namespace aws {
 namespace s3 {
 
-minifi::utils::optional<Aws::S3::Model::PutObjectResult> 
S3Wrapper::sendPutObjectRequest(const Aws::S3::Model::PutObjectRequest& 
request) {
-  Aws::S3::S3Client s3_client(credentials_, client_config_);
-  auto outcome = s3_client.PutObject(request);
+void HeadObjectResult::setFilePaths(const std::string& key) {
+  absolute_path = key;
+  std::tie(path, filename) = minifi::utils::file::FileUtils::split_path(key, 
true /*force_posix*/);
+}
+
+S3Wrapper::S3Wrapper() : 
request_sender_(minifi::utils::make_unique<S3ClientRequestSender>()) {
+}
+
+S3Wrapper::S3Wrapper(std::unique_ptr<S3RequestSender> request_sender) : 
request_sender_(std::move(request_sender)) {
+}
+
+void S3Wrapper::setCredentials(const Aws::Auth::AWSCredentials& cred) {
+  request_sender_->setCredentials(cred);
+}
+
+void S3Wrapper::setRegion(const Aws::String& region) {
+  request_sender_->setRegion(region);
+}
+
+void S3Wrapper::setTimeout(uint64_t timeout) {
+  request_sender_->setTimeout(timeout);
+}
+
+void S3Wrapper::setEndpointOverrideUrl(const Aws::String& url) {
+  request_sender_->setEndpointOverrideUrl(url);
+}
+
+void S3Wrapper::setProxy(const ProxyOptions& proxy) {
+  request_sender_->setProxy(proxy);
+}
+
+void S3Wrapper::setCannedAcl(Aws::S3::Model::PutObjectRequest& request, const 
std::string& canned_acl) const {
+  if (canned_acl.empty() || CANNED_ACL_MAP.find(canned_acl) == 
CANNED_ACL_MAP.end())
+    return;
+
+  logger_->log_debug("Setting AWS canned ACL [%s]", canned_acl);
+  request.SetACL(CANNED_ACL_MAP.at(canned_acl));
+}
+
+Expiration S3Wrapper::getExpiration(const std::string& expiration) {
+  minifi::utils::Regex expr("expiry-date=\"(.*)\", rule-id=\"(.*)\"");
+  const auto match = expr.match(expiration);
+  const auto& results = expr.getResult();
+  if (!match || results.size() < 3)
+    return Expiration{};
+  return Expiration{results[1], results[2]};
+}
+
+std::string 
S3Wrapper::getEncryptionString(Aws::S3::Model::ServerSideEncryption encryption) 
{
+  if (encryption == Aws::S3::Model::ServerSideEncryption::NOT_SET) {
+    return "";
+  }
+
+  auto it = std::find_if(SERVER_SIDE_ENCRYPTION_MAP.begin(), 
SERVER_SIDE_ENCRYPTION_MAP.end(),
+    [&](const std::pair<std::string, const 
Aws::S3::Model::ServerSideEncryption&> pair) {
+      return pair.second == encryption;
+    });
+  if (it != SERVER_SIDE_ENCRYPTION_MAP.end()) {
+    return it->first;
+  }
+  return "";
+}
+
+minifi::utils::optional<PutObjectResult> S3Wrapper::putObject(const 
PutObjectRequestParameters& put_object_params, std::shared_ptr<Aws::IOStream> 
data_stream) {
+  Aws::S3::Model::PutObjectRequest request;
+  request.SetBucket(put_object_params.bucket);
+  request.SetKey(put_object_params.object_key);
+  
request.SetStorageClass(STORAGE_CLASS_MAP.at(put_object_params.storage_class));
+  
request.SetServerSideEncryption(SERVER_SIDE_ENCRYPTION_MAP.at(put_object_params.server_side_encryption));
+  request.SetContentType(put_object_params.content_type);
+  request.SetMetadata(put_object_params.user_metadata_map);
+  request.SetBody(data_stream);
+  request.SetGrantFullControl(put_object_params.fullcontrol_user_list);
+  request.SetGrantRead(put_object_params.read_permission_user_list);
+  request.SetGrantReadACP(put_object_params.read_acl_user_list);
+  request.SetGrantWriteACP(put_object_params.write_acl_user_list);
+  setCannedAcl(request, put_object_params.canned_acl);
+
+  auto aws_result = request_sender_->sendPutObjectRequest(request);
+  if (!aws_result) {
+    return minifi::utils::nullopt;
+  }
+
+  PutObjectResult result;
+  // Etags are returned by AWS in quoted form that should be removed
+  result.etag = 
minifi::utils::StringUtils::removeFramingCharacters(aws_result->GetETag(), '"');
+  result.version = aws_result->GetVersionId();
+
+  // GetExpiration returns a string pair with a date and a ruleid in 
'expiry-date=\"<DATE>\", rule-id=\"<RULEID>\"' format
+  // s3.expiration only needs the date member of this pair
+  result.expiration = 
getExpiration(aws_result->GetExpiration()).expiration_time;
+  result.ssealgorithm = 
getEncryptionString(aws_result->GetServerSideEncryption());
+  return result;
+}
+
+bool S3Wrapper::deleteObject(const std::string& bucket, const std::string& 
object_key, const std::string& version) {
+  Aws::S3::Model::DeleteObjectRequest request;
+  request.SetBucket(bucket);
+  request.SetKey(object_key);
+  if (!version.empty()) {
+    request.SetVersionId(version);
+  }
+  return request_sender_->sendDeleteObjectRequest(request);
+}
+
+int64_t S3Wrapper::writeFetchedBody(Aws::IOStream& source, const int64_t 
data_size, const std::shared_ptr<io::BaseStream>& output) {
+  static const uint64_t BUFFER_SIZE = 4096;
+  std::vector<uint8_t> buffer;
+  buffer.reserve(BUFFER_SIZE);
 
-  if (outcome.IsSuccess()) {
-      logger_->log_info("Added S3 object '%s' to bucket '%s'", 
request.GetKey(), request.GetBucket());
-      return outcome.GetResultWithOwnership();
-  } else {
-    logger_->log_error("PutS3Object failed with the following: '%s'", 
outcome.GetError().GetMessage());
+  int64_t write_size = 0;
+  while (write_size < data_size) {
+    auto next_write_size = data_size - write_size < BUFFER_SIZE ? data_size - 
write_size : BUFFER_SIZE;
+    if (!source.read(reinterpret_cast<char*>(buffer.data()), next_write_size)) 
{

Review comment:
       Somehow I get segmentation fault when I use `readsome` and `eof` check 
combination here. I'm not sure of the root cause, I'll try to investigate it 
next week.




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


Reply via email to