dam4rus commented on code in PR #7003:
URL: https://github.com/apache/nifi/pull/7003#discussion_r1163915417


##########
nifi-nar-bundles/nifi-py4j-bundle/nifi-python-framework/src/main/python/framework/ExtensionManager.py:
##########
@@ -0,0 +1,531 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import importlib
+import sys
+import importlib.util  # Note requires Python 3.4+
+import inspect
+import logging
+import subprocess
+import ast
+import pkgutil
+from pathlib import Path
+
+logger = logging.getLogger("org.apache.nifi.py4j.ExtensionManager")
+
+# A simple wrapper class to encompass a processor type and its version
+class ExtensionId:
+    def __init__(self, classname=None, version=None):
+        self.classname = classname
+        self.version = version
+
+    def __hash__(self):
+        return hash((self.classname, self.version))
+
+    def __eq__(self, other):
+        return (self.classname, self.version) == (other.classname, 
other.version)
+
+
+class ExtensionDetails:
+    class Java:
+        implements = ['org.apache.nifi.python.PythonProcessorDetails']
+
+    def __init__(self, gateway, type, version='Unknown', dependencies=None, 
source_location=None, package_name=None, description=None, tags=None):
+        self.gateway = gateway
+        if dependencies is None:
+            dependencies = []
+        if tags is None:
+            tags = []
+
+        self.type = type
+        self.version = version
+        self.dependencies = dependencies
+        self.source_location = source_location
+        self.package_name = package_name
+        self.description = description
+        self.tags = tags
+
+    def getProcessorType(self):
+        return self.type
+
+    def getProcessorVersion(self):
+        return self.version
+
+    def getSourceLocation(self):
+        return self.source_location
+
+    def getPyPiPackageName(self):
+        return self.package_name
+
+    def getDependencies(self):
+        list = self.gateway.jvm.java.util.ArrayList()
+        for dep in self.dependencies:
+            list.add(dep)
+
+        return list
+
+    def getCapabilityDescription(self):
+        return self.description
+
+    def getTags(self):
+        list = self.gateway.jvm.java.util.ArrayList()
+        for tag in self.tags:
+            list.add(tag)
+
+        return list
+
+
+
+
+class ExtensionManager:
+    """
+    ExtensionManager is responsible for discovery of extensions types and the 
lifecycle management of those extension types.
+    Discovery of extension types includes finding what extension types are 
available
+    (e.g., which Processor types exist on the system), as well as information 
about those extension types, such as
+    the extension's documentation (tags and capability description).
+
+    Lifecycle management includes determining the third-party dependencies 
that an extension has and ensuring that those
+    third-party dependencies have been imported.
+    """
+
+    processorInterfaces = 
['org.apache.nifi.python.processor.FlowFileTransform', 
'org.apache.nifi.python.processor.RecordTransform']
+    processor_details = {}
+    processor_class_by_name = {}
+    module_files_by_extension_type = {}
+    dependency_directories = {}
+
+    def __init__(self, gateway):
+        self.gateway = gateway
+
+
+    def getProcessorTypes(self):
+        """
+        :return: a list of Processor types that have been discovered by the 
#discoverExtensions method
+        """
+        return self.processor_details.values()
+
+    def getProcessorClass(self, type, version, work_dir):
+        """
+        Returns the Python class that can be used to instantiate a processor 
of the given type.
+        Additionally, it ensures that the required third-party dependencies 
are on the system path in order to ensure that
+        the necessary libraries are available to the Processor so that it can 
be instantiated and used.
+
+        :param type: the type of Processor
+        :param version: the version of the Processor
+        :param work_dir: the working directory for extensions
+        :return: the Python class that can be used to instantiate a Processor 
of the given type and version
+
+        :raises ValueError: if there is no known Processor with the given type 
and version
+        """
+        id = ExtensionId(classname=type, version=version)
+        if id in self.processor_class_by_name:
+            return self.processor_class_by_name[id]
+
+        if id not in self.module_files_by_extension_type:
+            raise ValueError('Invalid Processor Type: No module is known to 
contain Processor of type ' + type + ' version ' + version)
+        module_file = self.module_files_by_extension_type[id]
+
+        if id in self.processor_details:
+            extension_working_dir = os.path.join(work_dir, 'extensions', type, 
version)
+            sys.path.insert(0, extension_working_dir)

Review Comment:
   To answer my own question in my 
[comment](https://github.com/apache/nifi/pull/7003#issuecomment-1471969300) , 
`$PYTHONPATH` is extended here. I don't think we should modify the 
`$PYTHONPATH` when we are already using virtual environments. As I've 
mentioned, all dependencies should go inside 
`$VIRTUAL_ENV/lib/$PYTHONVERSION/site-packages'. That way every Python script 
running in that specific virtual environment can use the dependencies. To 
achieve this, per processor type virtual environments should be created first, 
activated, then call `pip install` to install them inside the virtual 
environment. The current implementation of dependency installation feels kinda 
backwards to how it's usually done. Is there any technical reason we are doing 
it this way?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscr...@nifi.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org

Reply via email to