This is an automated email from the ASF dual-hosted git repository. szaszm pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/nifi-minifi-cpp.git
commit 24d30aa0eebdd423ed2c66153f13a6cf76a2e632 Author: Gabor Gyimesi <gamezb...@gmail.com> AuthorDate: Thu Apr 11 18:40:19 2024 +0200 MINIFICPP-2293 Support installing python dependencies defined inline Closes #1727 Signed-off-by: Marton Szasz <sza...@apache.org> --- docker/test/integration/cluster/ContainerStore.py | 3 ++ .../test/integration/cluster/DockerTestCluster.py | 3 ++ docker/test/integration/cluster/ImageStore.py | 37 ++++++++++++++-- .../cluster/containers/MinifiContainer.py | 7 ++- .../features/MiNiFi_integration_test_driver.py | 3 ++ docker/test/integration/features/python.feature | 1 + docker/test/integration/features/steps/steps.py | 2 + extensions/python/ExecutePythonProcessor.cpp | 1 + extensions/python/PYTHON.md | 7 ++- extensions/python/PythonCreator.h | 7 +++ extensions/python/PythonDependencyInstaller.cpp | 42 +++++++++++++----- extensions/python/PythonDependencyInstaller.h | 2 + .../utils/inline_dependency_installer.py | 51 ++++++++++++++++++++++ msi/WixWin.wsi.in | 6 +++ 14 files changed, 154 insertions(+), 18 deletions(-) diff --git a/docker/test/integration/cluster/ContainerStore.py b/docker/test/integration/cluster/ContainerStore.py index 0dad14fe0..8b17ee6ba 100644 --- a/docker/test/integration/cluster/ContainerStore.py +++ b/docker/test/integration/cluster/ContainerStore.py @@ -358,6 +358,9 @@ class ContainerStore: def use_nifi_python_processors_with_virtualenv_packages_installed_in_minifi(self): self.minifi_options.use_nifi_python_processors_with_virtualenv_packages_installed = True + def remove_python_requirements_txt_in_minifi(self): + self.minifi_options.remove_python_requirements_txt = True + def set_yaml_in_minifi(self): self.minifi_options.config_format = "yaml" diff --git a/docker/test/integration/cluster/DockerTestCluster.py b/docker/test/integration/cluster/DockerTestCluster.py index 4114b3f6d..5451f67b6 100644 --- a/docker/test/integration/cluster/DockerTestCluster.py +++ b/docker/test/integration/cluster/DockerTestCluster.py @@ -116,6 +116,9 @@ class DockerTestCluster: def use_nifi_python_processors_with_virtualenv_packages_installed_in_minifi(self): self.container_store.use_nifi_python_processors_with_virtualenv_packages_installed_in_minifi() + def remove_python_requirements_txt_in_minifi(self): + self.container_store.remove_python_requirements_txt_in_minifi() + def set_yaml_in_minifi(self): self.container_store.set_yaml_in_minifi() diff --git a/docker/test/integration/cluster/ImageStore.py b/docker/test/integration/cluster/ImageStore.py index bddc9e417..7a6e6cad6 100644 --- a/docker/test/integration/cluster/ImageStore.py +++ b/docker/test/integration/cluster/ImageStore.py @@ -24,6 +24,12 @@ from textwrap import dedent import os +class PythonOptions: + REQUIREMENTS_FILE = 0 + SYSTEM_INSTALLED_PACKAGES = 1 + INLINE_DEFINED_PACKAGES = 2 + + class ImageStore: def __init__(self): self.client = docker.from_env() @@ -43,9 +49,11 @@ class ImageStore: if container_engine == "minifi-cpp-sql": image = self.__build_minifi_cpp_sql_image() elif container_engine == "minifi-cpp-nifi-python": - image = self.__build_minifi_cpp_image_with_nifi_python_processors() + image = self.__build_minifi_cpp_image_with_nifi_python_processors(PythonOptions.REQUIREMENTS_FILE) elif container_engine == "minifi-cpp-nifi-python-system-python-packages": - image = self.__build_minifi_cpp_image_with_nifi_python_processors('RUN pip3 install "langchain<=0.17.0"') + image = self.__build_minifi_cpp_image_with_nifi_python_processors(PythonOptions.SYSTEM_INSTALLED_PACKAGES) + elif container_engine == "minifi-cpp-nifi-with-inline-python-dependencies": + image = self.__build_minifi_cpp_image_with_nifi_python_processors(PythonOptions.INLINE_DEFINED_PACKAGES) elif container_engine == "http-proxy": image = self.__build_http_proxy_image() elif container_engine == "postgresql-server": @@ -97,10 +105,25 @@ class ImageStore: return self.__build_image(dockerfile) - def __build_minifi_cpp_image_with_nifi_python_processors(self, additional_cmd=""): + def __build_minifi_cpp_image_with_nifi_python_processors(self, python_option): parse_document_url = "https://raw.githubusercontent.com/apache/nifi/rel/nifi-" + NifiContainer.NIFI_VERSION + "/nifi-python-extensions/nifi-text-embeddings-module/src/main/python/ParseDocument.py" chunk_document_url = "https://raw.githubusercontent.com/apache/nifi/rel/nifi-" + NifiContainer.NIFI_VERSION + "/nifi-python-extensions/nifi-text-embeddings-module/src/main/python/ChunkDocument.py" pip3_install_command = "" + requirements_install_command = "" + additional_cmd = "" + # The following sed command is used to remove the existing dependencies from the ParseDocument and ChunkDocument processors + # /class ProcessorDetails:/,/^$/: Do the following between 'class ProcessorDetails:' and the first empty line (so we don't modify other PropertyDescriptor blocks below) + # /^\s*dependencies\s*=/,/\]\s*$/: Do the following between 'dependencies =' at the start of a line, and ']' at the end of a line + # d: Delete line + parse_document_sed_cmd = 'sed -i "/class ProcessorDetails:/,/^$/{/^\\s*dependencies\\s*=/,/\\]\\s*$/d}" /opt/minifi/minifi-current/minifi-python/nifi_python_processors/ParseDocument.py && \\' + chunk_document_sed_cmd = 'sed -i "/class ProcessorDetails:/,/^$/{/^\\s*dependencies\\s*=/,/\\]\\s*$/d}" /opt/minifi/minifi-current/minifi-python/nifi_python_processors/ChunkDocument.py && \\' + if python_option == PythonOptions.SYSTEM_INSTALLED_PACKAGES: + additional_cmd = "RUN pip3 install 'langchain<=0.17.0'" + elif python_option == PythonOptions.REQUIREMENTS_FILE: + requirements_install_command = "echo 'langchain<=0.17.0' > /opt/minifi/minifi-current/minifi-python/nifi_python_processors/requirements.txt && \\" + elif python_option == PythonOptions.INLINE_DEFINED_PACKAGES: + parse_document_sed_cmd = parse_document_sed_cmd[:-2] + ' sed -i "54 i \\ \\ \\ \\ \\ \\ \\ \\ dependencies = [\\\"langchain<=0.17.0\\\"]" /opt/minifi/minifi-current/minifi-python/nifi_python_processors/ParseDocument.py && \\' + chunk_document_sed_cmd = 'sed -i "s/\\[\\\'langchain\\\'\\]/\\[\\\'langchain<=0.17.0\\\'\\]/" /opt/minifi/minifi-current/minifi-python/nifi_python_processors/ChunkDocument.py && \\' if not MinifiContainer.MINIFI_TAG_PREFIX: pip3_install_command = "RUN apk --update --no-cache add py3-pip" dockerfile = dedent("""\ @@ -113,6 +136,9 @@ class ImageStore: RUN wget {parse_document_url} --directory-prefix=/opt/minifi/minifi-current/minifi-python/nifi_python_processors && \\ wget {chunk_document_url} --directory-prefix=/opt/minifi/minifi-current/minifi-python/nifi_python_processors && \\ echo 'langchain<=0.17.0' > /opt/minifi/minifi-current/minifi-python/nifi_python_processors/requirements.txt && \\ + {requirements_install_command} + {parse_document_sed_cmd} + {chunk_document_sed_cmd} python3 -m venv /opt/minifi/minifi-current/venv && \\ python3 -m venv /opt/minifi/minifi-current/venv-with-langchain && \\ . /opt/minifi/minifi-current/venv-with-langchain/bin/activate && python3 -m pip install --no-cache-dir "langchain<=0.17.0" && \\ @@ -121,7 +147,10 @@ class ImageStore: pip3_install_command=pip3_install_command, parse_document_url=parse_document_url, chunk_document_url=chunk_document_url, - additional_cmd=additional_cmd)) + additional_cmd=additional_cmd, + requirements_install_command=requirements_install_command, + parse_document_sed_cmd=parse_document_sed_cmd, + chunk_document_sed_cmd=chunk_document_sed_cmd)) return self.__build_image(dockerfile, [os.path.join(self.test_dir, "resources", "python", "RotatingForwarder.py")]) diff --git a/docker/test/integration/cluster/containers/MinifiContainer.py b/docker/test/integration/cluster/containers/MinifiContainer.py index 73c14c7bf..6d6e8a6f5 100644 --- a/docker/test/integration/cluster/containers/MinifiContainer.py +++ b/docker/test/integration/cluster/containers/MinifiContainer.py @@ -36,6 +36,7 @@ class MinifiOptions: self.use_nifi_python_processors_with_system_python_packages_installed = False self.use_nifi_python_processors_with_virtualenv = False self.use_nifi_python_processors_with_virtualenv_packages_installed = False + self.remove_python_requirements_txt = False self.config_format = "json" self.use_flow_config_from_url = False self.set_ssl_context_properties = False @@ -150,12 +151,12 @@ class MinifiContainer(FlowContainer): f.write("controller.socket.port=9998\n") f.write("controller.socket.local.any.interface=false\n") - if self.options.use_nifi_python_processors_with_virtualenv: + if self.options.use_nifi_python_processors_with_virtualenv or self.options.remove_python_requirements_txt: f.write("nifi.python.virtualenv.directory=/opt/minifi/minifi-current/venv\n") elif self.options.use_nifi_python_processors_with_virtualenv_packages_installed: f.write("nifi.python.virtualenv.directory=/opt/minifi/minifi-current/venv-with-langchain\n") - if self.options.use_nifi_python_processors_with_virtualenv: + if self.options.use_nifi_python_processors_with_virtualenv or self.options.remove_python_requirements_txt: f.write("nifi.python.install.packages.automatically=true\n") def _setup_config(self): @@ -180,6 +181,8 @@ class MinifiContainer(FlowContainer): image = self.image_store.get_image('minifi-cpp-nifi-python-system-python-packages') elif self.options.use_nifi_python_processors_with_virtualenv or self.options.use_nifi_python_processors_with_virtualenv_packages_installed: image = self.image_store.get_image('minifi-cpp-nifi-python') + elif self.options.remove_python_requirements_txt: + image = self.image_store.get_image('minifi-cpp-nifi-with-inline-python-dependencies') else: image = 'apacheminificpp:' + MinifiContainer.MINIFI_TAG_PREFIX + MinifiContainer.MINIFI_VERSION diff --git a/docker/test/integration/features/MiNiFi_integration_test_driver.py b/docker/test/integration/features/MiNiFi_integration_test_driver.py index 35fd97bcd..383d751a9 100644 --- a/docker/test/integration/features/MiNiFi_integration_test_driver.py +++ b/docker/test/integration/features/MiNiFi_integration_test_driver.py @@ -432,6 +432,9 @@ class MiNiFi_integration_test: def use_nifi_python_processors_with_virtualenv_packages_installed_in_minifi(self): self.cluster.use_nifi_python_processors_with_virtualenv_packages_installed_in_minifi() + def remove_python_requirements_txt_in_minifi(self): + self.cluster.remove_python_requirements_txt_in_minifi() + def set_yaml_in_minifi(self): self.cluster.set_yaml_in_minifi() diff --git a/docker/test/integration/features/python.feature b/docker/test/integration/features/python.feature index c4ad12922..877e0d65a 100644 --- a/docker/test/integration/features/python.feature +++ b/docker/test/integration/features/python.feature @@ -89,6 +89,7 @@ Feature: MiNiFi can use python processors in its flows | with required python packages | | with a pre-created virtualenv | | with a pre-created virtualenv containing the required python packages | + | using inline defined Python dependencies to install packages | @USE_NIFI_PYTHON_PROCESSORS Scenario: MiNiFi C++ can use custom relationships in NiFi native python processors diff --git a/docker/test/integration/features/steps/steps.py b/docker/test/integration/features/steps/steps.py index 3bc9e781c..578a7742e 100644 --- a/docker/test/integration/features/steps/steps.py +++ b/docker/test/integration/features/steps/steps.py @@ -1284,5 +1284,7 @@ def step_impl(context, install_mode): context.test.use_nifi_python_processors_with_virtualenv_in_minifi() elif install_mode == "with a pre-created virtualenv containing the required python packages": context.test.use_nifi_python_processors_with_virtualenv_packages_installed_in_minifi() + elif install_mode == "using inline defined Python dependencies to install packages": + context.test.remove_python_requirements_txt_in_minifi() else: raise Exception("Unknown python install mode.") diff --git a/extensions/python/ExecutePythonProcessor.cpp b/extensions/python/ExecutePythonProcessor.cpp index 9f9743df7..a98d446d1 100644 --- a/extensions/python/ExecutePythonProcessor.cpp +++ b/extensions/python/ExecutePythonProcessor.cpp @@ -20,6 +20,7 @@ #include <utility> #include "ExecutePythonProcessor.h" +#include "PythonConfigState.h" #include "types/PyRelationship.h" #include "types/PyLogger.h" diff --git a/extensions/python/PYTHON.md b/extensions/python/PYTHON.md index 52d7031a5..c38b2055d 100644 --- a/extensions/python/PYTHON.md +++ b/extensions/python/PYTHON.md @@ -164,7 +164,6 @@ Due to some differences between the NiFi and MiNiFi C++ processors and implement - MiNiFi C++ only supports expression language with flow file attributes, so only FLOWFILE_ATTRIBUTES expression language scope is supported, otherwise the expression language will not be evaluated. - MiNiFi C++ does not support property dependencies, so the property dependencies will be ignored. If a property depends on another property, the property will not be required. - MiNiFi C++ does not support the use of self.jvm member in Python processors that provides JVM bindings in NiFi, it is set to None in MiNiFi C++. -- Inline definition of Python package dependencies, defined in the ProcessorDetails nested class are not supported as in NiFi, so the dependencies must be defined in the requirements.txt files. If a processor's dependencies are defined in the ProcessorDetails class, the dependencies should be copied to the requirements.txt file. ## Use Python processors from virtualenv @@ -182,6 +181,12 @@ It is possible to automatically install the dependencies of the Python processor # in minifi.properties nifi.python.install.packages.automatically=true +Additionally if the `nifi.python.install.packages.automatically` is set to true, the dependencies defined inline in the Python processors are also installed. These dependencies should be defined in the processors' `ProcessorDetails` nested class' `dependencies` attribute, which must be a list of strings containing the package names and optionally their required versions. + + class DetectObjectInImage(FlowFileTransform): + class ProcessorDetails: + dependencies = ['numpy >= 1.23.5', 'opencv-python >= 4.6'] + ## Set python binary for virtualenv creation and package installation By default the `python3` command is used on Unix systems and `python` command is used on Windows to create virtualenvs and call the `pip` command for installing Python packages. This can be changed using the `nifi.python.env.setup.binary` property to use a different python command or a specific python binary path. diff --git a/extensions/python/PythonCreator.h b/extensions/python/PythonCreator.h index 94a6131f6..3dc96d0f2 100644 --- a/extensions/python/PythonCreator.h +++ b/extensions/python/PythonCreator.h @@ -28,6 +28,7 @@ #include "core/logging/LoggerConfiguration.h" #include "core/Resource.h" #include "ExecutePythonProcessor.h" +#include "PythonConfigState.h" #include "PythonObjectFactory.h" #include "agent/agent_version.h" #include "agent/build_description.h" @@ -38,6 +39,7 @@ #include "utils/file/FilePattern.h" #include "range/v3/view/filter.hpp" #include "PythonDependencyInstaller.h" +#include "utils/file/PathUtils.h" namespace org::apache::nifi::minifi::extensions::python { @@ -78,6 +80,11 @@ class PythonCreator : public minifi::core::CoreComponent { class_name = full_name; } if (path.string().find("nifi_python_processors") != std::string::npos) { + auto utils_path = (std::filesystem::path("nifi_python_processors") / "utils").string(); + if (path.string().find(utils_path) != std::string::npos) { + continue; + } + dependency_installer.installInlinePythonDependencies(path); logger_->log_info("Registering NiFi python processor: {}", class_name); core::getClassLoader().registerClass(class_name, std::make_unique<PythonObjectFactory>(path.string(), script_name.string(), PythonProcessorType::NIFI_TYPE, std::vector<std::filesystem::path>{python_lib_path, std::filesystem::path{pathListings.value()}, path.parent_path()})); diff --git a/extensions/python/PythonDependencyInstaller.cpp b/extensions/python/PythonDependencyInstaller.cpp index f98e47bb1..b8239e54f 100644 --- a/extensions/python/PythonDependencyInstaller.cpp +++ b/extensions/python/PythonDependencyInstaller.cpp @@ -100,6 +100,20 @@ void PythonDependencyInstaller::createVirtualEnvIfSpecified() const { } } +void PythonDependencyInstaller::runInstallCommandInVirtualenv(const std::string& install_command) const { + std::string command_with_virtualenv; +#if WIN32 + command_with_virtualenv.append("\"").append((virtualenv_path_ / "Scripts" / "activate.bat").string()).append("\" && "); +#else + command_with_virtualenv.append(". \"").append((virtualenv_path_ / "bin" / "activate").string()).append("\" && "); +#endif + command_with_virtualenv.append(install_command); + auto return_value = std::system(encapsulateCommandInQuotesIfNeeded(command_with_virtualenv).c_str()); + if (return_value != 0) { + throw PythonScriptException(fmt::format("The following command to install python packages failed: '{}'", command_with_virtualenv)); + } +} + void PythonDependencyInstaller::installDependenciesFromRequirementsFiles() const { if (!isPackageInstallationNeeded()) { return; @@ -107,17 +121,9 @@ void PythonDependencyInstaller::installDependenciesFromRequirementsFiles() const auto requirement_file_paths = getRequirementsFilePaths(); for (const auto& requirements_file_path : requirement_file_paths) { logger_->log_info("Installing python packages from the following requirements.txt file: {}", requirements_file_path.string()); - std::string pip_command; -#if WIN32 - pip_command.append("\"").append((virtualenv_path_ / "Scripts" / "activate.bat").string()).append("\" && "); -#else - pip_command.append(". \"").append((virtualenv_path_ / "bin" / "activate").string()).append("\" && "); -#endif - pip_command.append("\"").append(python_binary_).append("\" -m pip install --no-cache-dir -r \"").append(requirements_file_path.string()).append("\""); - auto return_value = std::system(encapsulateCommandInQuotesIfNeeded(pip_command).c_str()); - if (return_value != 0) { - throw PythonScriptException(fmt::format("The following command to install python packages failed: '{}'", pip_command)); - } + // --no-cache-dir is used to be in line with NiFi's dependency install behavior + auto install_command = std::string("\"").append(python_binary_).append("\" -m pip install --no-cache-dir -r \"").append(requirements_file_path.string()).append("\""); + runInstallCommandInVirtualenv(install_command); } } @@ -165,4 +171,18 @@ void PythonDependencyInstaller::addVirtualenvToPath() const { } } +void PythonDependencyInstaller::installInlinePythonDependencies(const std::filesystem::path& script_file_path) const { + if (!isPackageInstallationNeeded()) { + return; + } + auto dependency_installer_path = python_processor_dir_ / "nifi_python_processors" / "utils" / "inline_dependency_installer.py"; + if (python_processor_dir_.empty() || !std::filesystem::exists(dependency_installer_path) || !std::filesystem::exists(script_file_path)) { + return; + } + logger_->log_info("Checking and installing inline defined Python dependencies of {}", script_file_path.string()); + auto install_command = std::string("\"").append(python_binary_).append("\" \"").append(dependency_installer_path.string()) + .append("\" \"").append(script_file_path.string()).append("\""); + runInstallCommandInVirtualenv(install_command); +} + } // namespace org::apache::nifi::minifi::extensions::python diff --git a/extensions/python/PythonDependencyInstaller.h b/extensions/python/PythonDependencyInstaller.h index 7cb271194..e74c47fa8 100644 --- a/extensions/python/PythonDependencyInstaller.h +++ b/extensions/python/PythonDependencyInstaller.h @@ -30,9 +30,11 @@ class PythonDependencyInstaller { public: explicit PythonDependencyInstaller(const std::shared_ptr<Configure> &configuration); void installDependenciesFromRequirementsFiles() const; + void installInlinePythonDependencies(const std::filesystem::path& script_file_path) const; private: std::vector<std::filesystem::path> getRequirementsFilePaths() const; + void runInstallCommandInVirtualenv(const std::string& install_command) const; void createVirtualEnvIfSpecified() const; static void evalScript(std::string_view script); void addVirtualenvToPath() const; diff --git a/extensions/python/pythonprocessors/nifi_python_processors/utils/inline_dependency_installer.py b/extensions/python/pythonprocessors/nifi_python_processors/utils/inline_dependency_installer.py new file mode 100644 index 000000000..565a5e120 --- /dev/null +++ b/extensions/python/pythonprocessors/nifi_python_processors/utils/inline_dependency_installer.py @@ -0,0 +1,51 @@ +import ast +import sys +import subprocess +import os + + +# Extract the list of PIP dependency packages from the visited processor class AST node +class Visitor(ast.NodeVisitor): + def __init__(self, class_name): + self.dependencies = [] + self.class_name = class_name + + def visit_ClassDef(self, node): + if node.name != self.class_name: + return + + # Iterate through the body of the class to find the ProcessorDetails nested class + for child in node.body: + if isinstance(child, ast.ClassDef) and child.name == 'ProcessorDetails': + # Iterate through the nodes of the 'ProcessorDetails' class + for detail in child.body: + # Check if the child node is an assignment of the 'dependencies' member variable + if isinstance(detail, ast.Assign) and detail.targets[0].id == 'dependencies': + # Iterate through values of the 'dependencies' list member variable + for elt in detail.value.elts: + # Check if the element is a string constant and add it to the dependencies list + if isinstance(elt, ast.Constant): + self.dependencies.append(elt.s) + break + break + + +def extract_dependencies(file_path): + class_name = file_path.split(os.sep)[-1].split('.')[0] + with open(file_path, 'r') as file: + code = file.read() + + tree = ast.parse(code) + visitor = Visitor(class_name) + visitor.visit(tree) + return visitor.dependencies + + +if __name__ == '__main__': + if len(sys.argv) < 2: + sys.exit(1) + + dependencies = extract_dependencies(sys.argv[1]) + if dependencies: + # --no-cache-dir is used to be in line with NiFi's dependency install behavior + subprocess.check_call([sys.executable, "-m", "pip", "install", "--no-cache-dir"] + dependencies) diff --git a/msi/WixWin.wsi.in b/msi/WixWin.wsi.in index b50498084..0f8a422e7 100644 --- a/msi/WixWin.wsi.in +++ b/msi/WixWin.wsi.in @@ -67,6 +67,7 @@ ${WIX_EXTRA_FEATURES} <ComponentRef Id="PythonProcessorH2oFiles"/> <ComponentRef Id="PythonProcessorNifiApiFiles"/> <ComponentRef Id="PythonProcessorGoogleFiles"/> + <ComponentRef Id="NifiPythonProcessorUtilsFiles"/> </Feature> <FeatureRef Id="ProductFeature"/> @@ -437,6 +438,11 @@ ${WIX_EXTRA_COMPONENTS} <Component Id="NifiPythonProcessorFiles" Guid="66ce16d4-aea4-4fcb-bbbb-8a28813e2138"> <File Id="NifiPythonProcessors_init" Name="__init__.py" KeyPath="yes" Source="..\extensions\python\pythonprocessors\nifi_python_processors\__init__.py"/> </Component> + <Directory Id="INSTALLNIFIPYTHONPROCESSORSUTILSDIR" Name="utils"> + <Component Id="NifiPythonProcessorUtilsFiles" Guid="6bb11f67-007f-4467-970a-c42c2bae0891"> + <File Id="NifiPythonProcessorUtils_dependency_installer" Name="inline_dependency_installer.py" KeyPath="yes" Source="..\extensions\python\pythonprocessors\nifi_python_processors\utils\inline_dependency_installer.py"/> + </Component> + </Directory> </Directory> </Directory> </Directory>