Copilot commented on code in PR #2186:
URL: https://github.com/apache/nifi-minifi-cpp/pull/2186#discussion_r3317807131
##########
minifi_rust/minifi_rs_behave/debian.dockerfile:
##########
@@ -0,0 +1,26 @@
+FROM rust:slim-bullseye AS chef
+RUN apt-get update && apt-get install -y clang lld pkg-config curl tar
+RUN cargo install cargo-chef
+WORKDIR /app
+
+FROM chef AS planner
+COPY . .
+RUN cargo chef prepare --recipe-path recipe.json
+
+FROM chef AS builder
+ARG MINIFI_SDK_PATH
+ENV MINIFI_SDK_PATH=${MINIFI_SDK_PATH}
+
+COPY --from=planner /app/recipe.json recipe.json
+
+# Conditionally copy the local SDK files from the target directory
+COPY target/.docker_sd[k] /app/target/.docker_sdk/
+COPY target/.docker_sdk.zi[p] /app/target/
+
+RUN cargo chef cook --release --recipe-path recipe.json
+COPY . .
+RUN cargo build --release
Review Comment:
The COPY source paths contain stray bracket characters
(`target/.docker_sd[k]`, `target/.docker_sdk.zi[p]`), which will fail the
Docker build because those paths will never exist. These should be the literal
`target/.docker_sdk` and `target/.docker_sdk.zip` paths produced by
linux_build.sh.
##########
minifi_rust/minifi_rs_behave/alpine.dockerfile:
##########
@@ -0,0 +1,27 @@
+FROM rust:alpine3.22 AS chef
+# Install build dependencies required for compiling C code & extensions on
Alpine
+RUN apk add --no-cache musl-dev gcc g++ clang-dev lld pkgconfig curl tar
+RUN cargo install cargo-chef
+WORKDIR /app
+
+FROM chef AS planner
+COPY . .
+RUN cargo chef prepare --recipe-path recipe.json
+
+FROM chef AS builder
+ARG MINIFI_SDK_PATH
+ENV MINIFI_SDK_PATH=${MINIFI_SDK_PATH}
+
+COPY --from=planner /app/recipe.json recipe.json
+
+# Conditionally copy the local SDK files from the target directory
+COPY target/.docker_sd[k] /app/target/.docker_sdk/
+COPY target/.docker_sdk.zi[p] /app/target/
+
+RUN cargo chef cook --release --recipe-path recipe.json
+COPY . .
+RUN cargo build --release
Review Comment:
The COPY source paths contain stray bracket characters
(`target/.docker_sd[k]`, `target/.docker_sdk.zi[p]`), which will fail the
Docker build because those paths will never exist. These should be the literal
`target/.docker_sdk` and `target/.docker_sdk.zip` paths produced by
linux_build.sh.
##########
minifi_rust/extensions/minifi_rs_playground/src/processors/put_file.rs:
##########
@@ -0,0 +1,239 @@
+use crate::processors::put_file::relationships::{FAILURE, SUCCESS};
+use minifi_native::macros::{ComponentIdentifier, DefaultMetrics,
NoAdvancedProcessorFeatures};
+use minifi_native::{
+ FlowFileTransform, GetAttribute, GetControllerService, GetProperty,
InputStream, Logger,
+ MinifiError, Schedule, TransformedFlowFile, trace, warn,
+};
+use std::path::{Path, PathBuf};
+use strum_macros::{Display, EnumString, IntoStaticStr, VariantNames};
+use walkdir::WalkDir;
+
+mod properties;
+mod relationships;
+#[cfg(unix)]
+mod unix_only_properties;
+
+#[derive(Debug, Clone, Copy, PartialEq, Display, EnumString, VariantNames,
IntoStaticStr)]
+#[strum(serialize_all = "camelCase", const_into_str)]
+enum ConflictResolutionStrategy {
+ Fail,
+ Replace,
+ Ignore,
+}
+
+#[cfg(unix)]
+#[derive(Debug)]
+struct PutFileUnixPermissions {
+ file_permissions: Option<std::fs::Permissions>,
+ directory_permissions: Option<std::fs::Permissions>,
+}
+
+#[cfg(unix)]
+impl PutFileUnixPermissions {
+ fn set_directory_permissions(&self, path: &Path) -> std::io::Result<()> {
+ if let Some(permissions) = self.directory_permissions.as_ref().map(|p|
p.clone()) {
+ return std::fs::set_permissions(path, permissions);
+ }
+ Ok(())
+ }
+
+ fn set_file_permissions(&self, file: &Path) -> std::io::Result<()> {
+ if let Some(permissions) = self.file_permissions.as_ref().map(|p|
p.clone()) {
+ return std::fs::set_permissions(file, permissions);
+ }
+ Ok(())
+ }
+}
+
+#[cfg(windows)]
+#[derive(Debug)]
+struct PutFileUnixPermissions {}
+
+#[cfg(windows)]
+impl PutFileUnixPermissions {
+ fn set_directory_permissions(&self, _path: &Path) -> std::io::Result<()> {
+ Ok(())
+ }
+
+ fn set_file_permissions(&self, _file: &Path) -> std::io::Result<()> {
+ Ok(())
+ }
+}
+
+#[derive(Debug, ComponentIdentifier, DefaultMetrics,
NoAdvancedProcessorFeatures)]
+pub(crate) struct PutFileRs {
+ conflict_resolution_strategy: ConflictResolutionStrategy,
+ try_make_dirs: bool,
+ maximum_file_count: Option<u64>,
+ unix_permissions: PutFileUnixPermissions,
+}
+
+impl PutFileRs {
+ pub(crate) fn directory_is_full(&self, p0: &Path) -> bool {
+ if let Some(max_file_count) = self.maximum_file_count
+ && let Some(parent) = p0.parent()
+ {
+ parent.exists()
+ && WalkDir::new(parent)
+ .into_iter()
+ .filter_map(Result::ok)
+ .filter(|e| e.file_type().is_file())
+ .count()
+ >= max_file_count as usize
+ } else {
+ false
+ }
+ }
+
+ fn get_destination_path<Ctx>(context: &Ctx) -> Result<PathBuf, MinifiError>
+ where
+ Ctx: GetProperty + GetAttribute,
+ {
+ let directory = context
+ .get_property(&properties::DIRECTORY)?
+ .expect("required property");
+
+ let file_name = context
+ .get_attribute("filename")?
+ .unwrap_or("foo.txt".to_string()); // TODO fallback to UUID
+ Ok(PathBuf::from(directory + "/" + file_name.as_str()))
+ }
Review Comment:
Destination path construction uses string concatenation (`directory + "/" +
filename`) and does not sanitize the `filename` attribute. This is not portable
(Windows separators) and allows path traversal (e.g. filename
`../../etc/passwd`) to escape the configured output directory. Build the path
with `PathBuf::from(directory).join(safe_filename)` and reject/strip any path
separators or `..` components (e.g. by using
`Path::new(&file_name).file_name()`).
##########
.github/workflows/ci.yml:
##########
@@ -578,8 +625,14 @@ jobs:
continue-on-error: true
run: ./run_flake8.sh .
+ - id: cargo_check
+ name: Cargo check
+ continue-on-error: true
+ run: cargo fmt --check
+ working-directory: minifi_rust
+
Review Comment:
The workflow step is labeled "Cargo check" but it runs `cargo fmt --check`.
This is misleading when diagnosing failures; either run `cargo check` here as
well, or rename the step/job to indicate it is a formatting check.
##########
cmake/MiNiFiOptions.cmake:
##########
@@ -116,6 +116,7 @@ add_minifi_option(ENABLE_EXECUTE_PROCESS "Enable
ExecuteProcess processor" OFF)
add_minifi_option(ENABLE_CONTROLLER "Enables the build of MiNiFi controller
binary." ON)
add_minifi_option(ENABLE_LLAMACPP "Enables llama.cpp support." ON)
add_minifi_option(ENABLE_OPC "Instructs the build system to enable the OPC
extension" ON)
+add_minifi_option(MINIFI_RUST "Enables the build of rust based extension." ON)
Review Comment:
`MINIFI_RUST` is defaulted to ON, which will make Rust tooling (and the
Corrosion FetchContent dependency) a hard requirement for a default CMake
configure/build. CI explicitly sets `-DMINIFI_RUST=OFF`, so it seems intended
to be optional; consider defaulting this option to OFF to avoid breaking
existing build environments that don't have Rust installed.
##########
minifi_rust/minifi_native/src/c_ffi/c_ffi_primitives.rs:
##########
@@ -0,0 +1,85 @@
+use crate::ProcessorInputRequirement;
+use minifi_native_sys::{
+ MinifiInputRequirement, MinifiInputRequirement_MINIFI_INPUT_ALLOWED,
+ MinifiInputRequirement_MINIFI_INPUT_FORBIDDEN,
MinifiInputRequirement_MINIFI_INPUT_REQUIRED,
+ MinifiStringView,
+};
+use std::os::raw::c_char;
+
+#[derive(Debug)]
+pub enum FfiConversionError {
+ NullPointer,
+ InvalidUtf8,
+}
+
+#[derive(Debug)]
+pub(crate) struct StringView<'a> {
+ inner: MinifiStringView,
+ _marker: std::marker::PhantomData<&'a ()>,
+}
+
+impl<'a> StringView<'a> {
+ pub(crate) fn new(str: &'a str) -> Self {
+ Self {
+ inner: MinifiStringView {
+ data: str.as_ptr() as *const c_char,
+ length: str.len(),
+ },
+ _marker: std::marker::PhantomData,
+ }
+ }
+
+ pub unsafe fn as_raw(&self) -> MinifiStringView {
+ self.inner
+ }
+}
+
+pub trait StaticStrAsMinifiCStr {
+ fn as_minifi_c_type(&self) -> MinifiStringView;
+}
+
+impl StaticStrAsMinifiCStr for &'static str {
+ fn as_minifi_c_type(&self) -> MinifiStringView {
+ MinifiStringView {
+ data: self.as_ptr() as *const c_char,
+ length: self.len(),
+ }
+ }
+}
+
+pub trait ConvertMinifiStringView {
+ unsafe fn as_string(&self) -> Result<String, FfiConversionError>;
+ unsafe fn as_str(&self) -> Result<&str, FfiConversionError>;
+}
+
+impl ConvertMinifiStringView for MinifiStringView {
+ unsafe fn as_string(&self) -> Result<String, FfiConversionError> {
+ if self.data.is_null() {
+ return Err(FfiConversionError::NullPointer);
+ }
+ unsafe {
+ let slice = std::slice::from_raw_parts(self.data as *const u8,
self.length);
+ String::from_utf8(slice.to_vec()).map_err(|_|
FfiConversionError::InvalidUtf8)
+ }
+ }
+
+ unsafe fn as_str(&self) -> Result<&str, FfiConversionError> {
+ if self.data.is_null() {
+ return Err(FfiConversionError::NullPointer);
+ }
+ unsafe {
+ let slice = std::slice::from_raw_parts(self.data as *const u8,
self.length);
+ str::from_utf8(slice).map_err(|_| FfiConversionError::InvalidUtf8)
+ }
Review Comment:
`str::from_utf8` is used without importing `std::str` (or qualifying it as
`std::str::from_utf8`), which will not compile. Either add `use std::str;` or
change the call to `std::str::from_utf8(slice)`.
##########
minifi_rust/minifi_rs_behave/build.rs:
##########
@@ -0,0 +1,6 @@
+fn main() {
+ println!("cargo:rerun-if-env-changed=DEP_MINIFI_BEHAVE_PATH");
+ let behave_path =
+ std::env::var("DEP_MINIFI_BEHAVE_PATH").expect("DEP_MINIFI_BEHAVE_PATH
is required");
+ println!("cargo:rustc-env=MINIFI_BEHAVE_PATH={}", behave_path);
+}
Review Comment:
This build script reads `DEP_MINIFI_BEHAVE_PATH`, but Cargo exposes
build-script metadata from the `minifi_native_sys` dependency under
`DEP_MINIFI_NATIVE_SYS_BEHAVE_PATH` (derived from `cargo:behave_path=...`). As
written, `MINIFI_BEHAVE_PATH` will never be set and the behave runner will fail
at runtime.
##########
minifi_rust/minifi_native/src/api/process_context.rs:
##########
@@ -0,0 +1,124 @@
+use crate::StandardPropertyValidator::*;
+use crate::api::RawControllerService;
+use crate::api::component_definition_traits::ComponentIdentifier;
+use crate::api::flow_file::FlowFile;
+use crate::api::property::GetControllerService;
+use crate::{EnableControllerService, GetProperty, MinifiError, Property};
+use std::str::FromStr;
+use std::time::Duration;
+
+pub trait ProcessContext {
+ type FlowFile: FlowFile;
+
+ fn get_property(
+ &self,
+ property: &Property,
+ flow_file: Option<&Self::FlowFile>,
+ ) -> Result<Option<String>, MinifiError>;
+
+ fn get_bool_property(
+ &self,
+ property: &Property,
+ flow_file: Option<&Self::FlowFile>,
+ ) -> Result<Option<bool>, MinifiError> {
+ if property.validator != BoolValidator {
+ return Err(MinifiError::validation_err(format!(
+ "to use get_bool_property {:?} must have BoolValidator",
+ property
+ )));
+ }
+
+ if let Some(property_val) = self.get_property(property, flow_file)? {
+ Ok(Some(bool::from_str(&property_val)?))
+ } else {
+ Ok(None)
+ }
+ }
+
+ fn get_duration_property(
+ &self,
+ property: &Property,
+ flow_file: Option<&Self::FlowFile>,
+ ) -> Result<Option<Duration>, MinifiError> {
+ if property.validator != TimePeriodValidator {
+ return Err(MinifiError::validation_err(format!(
+ "to use get_duration_property {:?} must have
TimePeriodValidator",
+ property
+ )));
+ }
+
+ if let Some(property_val) = self.get_property(property, flow_file)? {
+ Ok(Some(humantime::parse_duration(property_val.as_str())?))
+ } else {
+ Ok(None)
+ }
+ }
+
+ fn get_size_property(
+ &self,
+ property: &Property,
+ flow_file: Option<&Self::FlowFile>,
+ ) -> Result<Option<u64>, MinifiError> {
+ if property.validator != DataSizeValidator {
+ return Err(MinifiError::validation_err(format!(
+ "to use get_size_property {:?} must have DataSizeValidator",
+ property
+ )));
+ }
+ if let Some(property_val) = self.get_property(property, flow_file)? {
+ Ok(Some(byte_unit::Byte::from_str(&property_val)?.as_u64()))
+ } else {
+ Ok(None)
+ }
+ }
+
+ fn get_u64_property(
+ &self,
+ property: &Property,
+ flow_file: Option<&Self::FlowFile>,
+ ) -> Result<Option<u64>, MinifiError> {
+ if property.validator != U64Validator {
+ return Err(MinifiError::validation_err(format!(
+ "to use get_u64_property {:?} must have U64Validator",
+ property
+ )));
+ }
+ if let Some(property_val) = self.get_property(property, flow_file)? {
+ Ok(Some(u64::from_str(&property_val)?))
+ } else {
+ Ok(None)
+ }
+ }
+
+ fn get_raw_controller_service<Cs>(
+ &self,
+ property: &Property,
+ ) -> Result<Option<&Cs>, MinifiError>
+ where
+ Cs: RawControllerService + ComponentIdentifier + 'static;
+
+ fn get_controller_service<Cs>(&self, property: &Property) ->
Result<Option<&Cs>, MinifiError>
+ where
+ Cs: EnableControllerService + ComponentIdentifier + 'static;
+}
+
+impl<S> GetProperty for S
+where
+ S: ProcessContext,
+{
+ fn get_property(&self, property: &Property) -> Result<Option<String>,
MinifiError> {
+ self.get_property(property, None)
+ }
+}
+
+impl<S> GetControllerService for S
+where
+ S: ProcessContext,
+{
+ fn get_controller_service<Cs>(&self, property: &Property) ->
Result<Option<&Cs>, MinifiError>
+ where
+ Cs: EnableControllerService + ComponentIdentifier + 'static,
+ {
+ self.get_controller_service(property)
+ }
+}
Review Comment:
`GetControllerService for S` is implemented with
`self.get_controller_service(property)`, which resolves back to the same trait
method and causes unconditional recursion (stack overflow) instead of
delegating to `ProcessContext::get_controller_service`. Use UFCS to call the
`ProcessContext` method (e.g.
`ProcessContext::get_controller_service::<Cs>(self, property)`).
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]