fgerlits commented on code in PR #1297:
URL: https://github.com/apache/nifi-minifi-cpp/pull/1297#discussion_r873903464


##########
extensions/gcp/processors/DeleteGCSObject.cpp:
##########
@@ -0,0 +1,122 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "DeleteGCSObject.h"
+
+#include "core/Resource.h"
+#include "core/ProcessContext.h"
+#include "core/ProcessSession.h"
+#include "core/FlowFile.h"
+#include "utils/OptionalUtils.h"
+#include "../GCPAttributes.h"
+
+namespace gcs = ::google::cloud::storage;
+
+namespace org::apache::nifi::minifi::extensions::gcp {
+const core::Property DeleteGCSObject::Bucket(
+    core::PropertyBuilder::createProperty("Bucket")
+        ->withDescription("Bucket of the object.")
+        ->withDefaultValue("${gcs.bucket}")
+        ->supportsExpressionLanguage(true)
+        ->build());
+
+const core::Property DeleteGCSObject::Key(
+    core::PropertyBuilder::createProperty("Key")
+        ->withDescription("Name of the object.")
+        ->withDefaultValue("${filename}")
+        ->supportsExpressionLanguage(true)
+        ->build());
+
+const core::Property DeleteGCSObject::Generation(
+    core::PropertyBuilder::createProperty("Object Generation")
+        ->withDescription("The generation of the object to be deleted. If 
null, will use latest version of the object.")
+        ->supportsExpressionLanguage(false)
+        ->build());
+
+const core::Property DeleteGCSObject::EncryptionKey(
+    core::PropertyBuilder::createProperty("Server Side Encryption Key")
+        ->withDescription("The AES256 Encryption Key (encoded in base64) for 
server-side decryption of the object.")
+        ->isRequired(false)
+        ->supportsExpressionLanguage(true)
+        ->build());
+
+const core::Relationship DeleteGCSObject::Success("success", "FlowFiles are 
routed to this relationship after a successful Google Cloud Storage 
operation.");
+const core::Relationship DeleteGCSObject::Failure("failure", "FlowFiles are 
routed to this relationship if the Google Cloud Storage operation fails.");
+
+void DeleteGCSObject::initialize() {
+  setSupportedProperties({GCPCredentials,
+                          Bucket,
+                          Key,
+                          Generation,
+                          NumberOfRetries,
+                          EncryptionKey,
+                          EndpointOverrideURL});
+  setSupportedRelationships({Success, Failure});
+}
+
+void DeleteGCSObject::onSchedule(const std::shared_ptr<core::ProcessContext>& 
context, const std::shared_ptr<core::ProcessSessionFactory>& session_factory) {
+  GCSProcessor::onSchedule(context, session_factory);
+}
+
+void DeleteGCSObject::onTrigger(const std::shared_ptr<core::ProcessContext>& 
context, const std::shared_ptr<core::ProcessSession>& session) {
+  gsl_Expects(context && session && gcp_credentials_);
+
+  auto flow_file = session->get();
+  if (!flow_file) {
+    context->yield();
+    return;
+  }
+
+  auto bucket = context->getProperty(Bucket, flow_file);
+  if (!bucket || bucket->empty()) {
+    logger_->log_error("Missing bucket name");
+    session->transfer(flow_file, Failure);
+    return;
+  }
+  auto object_name = context->getProperty(Key, flow_file);
+  if (!object_name || object_name->empty()) {
+    logger_->log_error("Missing object name");
+    session->transfer(flow_file, Failure);
+    return;
+  }
+
+  gcs::Generation generation;
+  auto generation_str = context->getProperty(Generation, flow_file);
+  if (generation_str) {
+    try {
+      generation = gcs::Generation(std::stol(*generation_str));
+    } catch (const std::invalid_argument&) {
+      logger_->log_error("Invalid generation %s", *generation_str);

Review Comment:
   Is a "transfer to failure; return" missing here?



##########
PROCESSORS.md:
##########
@@ -390,6 +393,42 @@ In the list below, the names of required properties appear 
in bold. Any other pr
 |failure|If file deletion from Azure Data Lake storage fails the flowfile is 
transferred to this relationship|
 |success|If file deletion from Azure Data Lake storage succeeds the flowfile 
is transferred to this relationship|
 
+## DeleteGCSObject
+
+### Description
+
+Deletes an object from a Google Cloud Bucket.
+
+### Properties
+
+In the list below, the names of required properties appear in bold. Any other 
properties (not in bold) are considered optional. The table also indicates any 
default values, and whether a property supports the NiFi Expression Language.
+
+| Name                                 | Default Value | Allowable Values      
                                                            | Description       
                                                                                
                               |
+|--------------------------------------|---------------|-----------------------------------------------------------------------------------|----------------------------------------------------------------------------------------------------------------------------------|
+| **Bucket**                           | ${gcs.bucket} |                       
                                                            | Bucket of the 
object.<br>**Supports Expression Language: true**                               
                                   |
+| **Key**                              | ${filename}   |                       
                                                            | Name of the 
object.<br>**Supports Expression Language: true**                               
                                     |
+| **Number Of retries**                | 6             | integers              
                                                            | How many retry 
attempts should be made before routing to the failure relationship.             
                                  |
+| **GCP Credentials Provider Service** |               | 
[GCPCredentialsControllerService](CONTROLLERS.md#GCPCredentialsControllerService)
 | The Controller Service used to obtain Google Cloud Platform credentials.     
                                                    |
+| Server Side Encryption Key           |               |                       
                                                            | An AES256 
Encryption Key (encoded in base64) for server-side encryption of the 
object.<br>**Supports Expression Language: true** |
+| Generation                           |               |                       
                                                            | The generation of 
the Object to download. If null, will download latest generation.               
                               |

Review Comment:
   "null" is not clear to me, as it could either mean "empty or missing" or "is 
the string 'null'".  I would change it to "empty or missing".  (Also in the 
documentation of the other two processors.)



##########
extensions/gcp/processors/DeleteGCSObject.cpp:
##########
@@ -0,0 +1,122 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "DeleteGCSObject.h"
+
+#include "core/Resource.h"
+#include "core/ProcessContext.h"
+#include "core/ProcessSession.h"
+#include "core/FlowFile.h"
+#include "utils/OptionalUtils.h"
+#include "../GCPAttributes.h"
+
+namespace gcs = ::google::cloud::storage;
+
+namespace org::apache::nifi::minifi::extensions::gcp {
+const core::Property DeleteGCSObject::Bucket(
+    core::PropertyBuilder::createProperty("Bucket")
+        ->withDescription("Bucket of the object.")
+        ->withDefaultValue("${gcs.bucket}")
+        ->supportsExpressionLanguage(true)
+        ->build());
+
+const core::Property DeleteGCSObject::Key(
+    core::PropertyBuilder::createProperty("Key")
+        ->withDescription("Name of the object.")
+        ->withDefaultValue("${filename}")
+        ->supportsExpressionLanguage(true)
+        ->build());
+
+const core::Property DeleteGCSObject::Generation(
+    core::PropertyBuilder::createProperty("Object Generation")
+        ->withDescription("The generation of the object to be deleted. If 
null, will use latest version of the object.")
+        ->supportsExpressionLanguage(false)
+        ->build());
+
+const core::Property DeleteGCSObject::EncryptionKey(
+    core::PropertyBuilder::createProperty("Server Side Encryption Key")
+        ->withDescription("The AES256 Encryption Key (encoded in base64) for 
server-side decryption of the object.")
+        ->isRequired(false)
+        ->supportsExpressionLanguage(true)
+        ->build());
+
+const core::Relationship DeleteGCSObject::Success("success", "FlowFiles are 
routed to this relationship after a successful Google Cloud Storage 
operation.");
+const core::Relationship DeleteGCSObject::Failure("failure", "FlowFiles are 
routed to this relationship if the Google Cloud Storage operation fails.");
+
+void DeleteGCSObject::initialize() {
+  setSupportedProperties({GCPCredentials,
+                          Bucket,
+                          Key,
+                          Generation,
+                          NumberOfRetries,
+                          EncryptionKey,
+                          EndpointOverrideURL});
+  setSupportedRelationships({Success, Failure});
+}
+
+void DeleteGCSObject::onSchedule(const std::shared_ptr<core::ProcessContext>& 
context, const std::shared_ptr<core::ProcessSessionFactory>& session_factory) {
+  GCSProcessor::onSchedule(context, session_factory);
+}
+
+void DeleteGCSObject::onTrigger(const std::shared_ptr<core::ProcessContext>& 
context, const std::shared_ptr<core::ProcessSession>& session) {
+  gsl_Expects(context && session && gcp_credentials_);
+
+  auto flow_file = session->get();
+  if (!flow_file) {
+    context->yield();
+    return;
+  }
+
+  auto bucket = context->getProperty(Bucket, flow_file);
+  if (!bucket || bucket->empty()) {
+    logger_->log_error("Missing bucket name");
+    session->transfer(flow_file, Failure);
+    return;
+  }
+  auto object_name = context->getProperty(Key, flow_file);
+  if (!object_name || object_name->empty()) {
+    logger_->log_error("Missing object name");
+    session->transfer(flow_file, Failure);
+    return;
+  }
+
+  gcs::Generation generation;
+  auto generation_str = context->getProperty(Generation, flow_file);
+  if (generation_str) {
+    try {
+      generation = gcs::Generation(std::stol(*generation_str));
+    } catch (const std::invalid_argument&) {
+      logger_->log_error("Invalid generation %s", *generation_str);
+    }
+  }
+
+  auto status = getClient().DeleteObject(*bucket, *object_name, generation, 
gcs::IfGenerationNotMatch(0));
+
+  if (!status.ok()) {
+    flow_file->setAttribute(GCS_STATUS_MESSAGE, status.message());
+    flow_file->setAttribute(GCS_ERROR_REASON, status.error_info().reason());
+    flow_file->setAttribute(GCS_ERROR_DOMAIN, status.error_info().domain());
+    logger_->log_error("Failed to delete from Google Cloud Storage %s %s", 
status.message(), status.error_info().reason());

Review Comment:
   it could be useful to log `bucket` and `object_name`, too



##########
extensions/gcp/processors/ListGCSBucket.cpp:
##########
@@ -0,0 +1,80 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ListGCSBucket.h"
+
+#include "core/Resource.h"
+#include "core/FlowFile.h"
+#include "core/ProcessContext.h"
+#include "core/ProcessSession.h"
+#include "utils/OptionalUtils.h"
+#include "../GCPAttributes.h"
+
+namespace gcs = ::google::cloud::storage;
+
+namespace org::apache::nifi::minifi::extensions::gcp {
+const core::Property ListGCSBucket::Bucket(
+    core::PropertyBuilder::createProperty("Bucket")
+        ->withDescription("Bucket of the object.")
+        ->isRequired(true)
+        ->supportsExpressionLanguage(true)
+        ->build());
+
+const core::Property ListGCSBucket::UseVersions(
+    core::PropertyBuilder::createProperty("List all versions")
+        ->withDescription("Set this option to `true` to get all the previous 
versions separately.")
+        ->withDefaultValue<bool>(false)
+        ->build());
+
+const core::Relationship ListGCSBucket::Success("success", "FlowFiles are 
routed to this relationship after a successful Google Cloud Storage 
operation.");
+
+void ListGCSBucket::initialize() {
+  setSupportedProperties({GCPCredentials,
+                          Bucket,
+                          NumberOfRetries,
+                          EndpointOverrideURL,
+                          UseVersions});
+  setSupportedRelationships({Success});
+}
+
+
+void ListGCSBucket::onSchedule(const std::shared_ptr<core::ProcessContext>& 
context, const std::shared_ptr<core::ProcessSessionFactory>& session_factory) {
+  GCSProcessor::onSchedule(context, session_factory);
+  gsl_Expects(context);
+  context->getProperty(Bucket.getName(), bucket_);
+}
+
+void ListGCSBucket::onTrigger(const std::shared_ptr<core::ProcessContext>& 
context, const std::shared_ptr<core::ProcessSession>& session) {
+  gsl_Expects(context && session && gcp_credentials_);
+
+  gcs::Client client = getClient();
+  auto use_generations = context->getProperty<bool>(UseVersions);
+  gcs::Versions versions = (use_generations && *use_generations) ? 
gcs::Versions(true) : gcs::Versions(false);
+  auto objects_in_bucket = client.ListObjects(bucket_, versions);
+  for (auto& object_in_bucket : objects_in_bucket) {

Review Comment:
   could this be `const auto&`?



##########
PROCESSORS.md:
##########
@@ -970,6 +1046,59 @@ In the list below, the names of required properties 
appear in bold. Any other pr
 |success|All FlowFiles that are received are routed to success|
 
 
+## ListGCSBucket
+
+### Description
+
+Retrieves a listing of objects from an GCS bucket.
+For each object that is listed, creates a FlowFile that represents the object 
so that it can be fetched or deleted in conjunction with 
FetchGCSObject/DeleteGCSObject.
+
+### Properties
+
+In the list below, the names of required properties appear in bold. Any other 
properties (not in bold) are considered optional. The table also indicates any 
default values, and whether a property supports the NiFi Expression Language.
+
+| Name                                 | Default Value | Allowable Values      
                                                            | Description       
                                                                 |
+|--------------------------------------|---------------|-----------------------------------------------------------------------------------|------------------------------------------------------------------------------------|
+| **Bucket**                           |               |                       
                                                            | Bucket of the 
object.<br>**Supports Expression Language: true**                    |
+| **Number Of retries**                | 6             | integers              
                                                            | How many retry 
attempts should be made before routing to the failure relationship. |
+| **GCP Credentials Provider Service** |               | 
[GCPCredentialsControllerService](CONTROLLERS.md#GCPCredentialsControllerService)
 | The Controller Service used to obtain Google Cloud Platform credentials.     
      |
+| Endpoint Override URL                |               |                       
                                                            | Overrides the 
default Google Cloud Storage endpoints                               |
+| List all versions                    | false         | false<br>true         
                                                            | Set this option 
to `true` to get all the previous versions separately.             |
+
+
+### Relationships
+
+| Name    | Description                                                        
                           |
+|---------|-----------------------------------------------------------------------------------------------|
+| success | FlowFiles are routed to this relationship after a successful 
Google Cloud Storage operation.  |
+
+### Output Attributes
+
+| Attribute                  | Relationship | Description                      
                                  |
+|----------------------------|--------------|--------------------------------------------------------------------|
+| _gcs.bucket_               | success      | Bucket of the object.            
                                  |
+| _gcs.key, filename_        | success      | Name of the object.              
                                  |
+| _gcs.size_                 | success      | Size of the object.              
                                  |
+| _gcs.crc32c_               | success      | The CRC32C checksum of object's 
data, encoded in base64            |
+| _gcs.md5_                  | success      | The MD5 hash of the object's 
data encoded in base64.               |
+| _gcs.owner.entity_         | success      | The owner entity, in the form 
"user-emailAddress".                 |
+| _gcs.owner.entity.id_      | success      | The ID for the entity.           
                                  |
+| _gcs.content.encoding_     | success      | The content encoding of the 
object.                                |
+| _gcs.content.language_     | success      | The content language of the 
object.                                |
+| _gcs.content.disposition_  | success      | The data content disposition of 
the object.                        |
+| _gcs.media.link_           | success      | The media download link to the 
object.                             |
+| _gcs.self.link_            | success      | The link to this object.         
                                  |
+| _gcs.etag_                 | success      | The HTTP 1.1 Entity tag for the 
object.                            |
+| _gcs.generated.id_         | success      | The service-generated ID for the 
object                            |
+| _gcs.generation_           | success      | The content generation of this 
object. Used for object versioning. |
+| _gcs.metageneration_       | success      | The metageneration of the 
object.                                  |
+| _gcs.create.time_          | success      | The creation time of the object 
(milliseconds)                     |
+| _gcs.update.time_          | success      | The last modification time of 
the object (milliseconds)            |
+| _gcs.delete.time_          | success      | The deletion time of the object 
(milliseconds)                     |

Review Comment:
   To me, "milliseconds" is a good explanation for a duration, but not for a 
timestamp (number of milliseconds since when?)  I would change them to "Unix 
timestamp in milliseconds".



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscr...@nifi.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org

Reply via email to