[GitHub] [airflow] turbaszek commented on a change in pull request #9531: Support .airflowignore for plugins

2020-07-03 Thread GitBox


turbaszek commented on a change in pull request #9531:
URL: https://github.com/apache/airflow/pull/9531#discussion_r449633646



##
File path: tests/plugins/test_plugin_ignore.py
##
@@ -0,0 +1,96 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+import os
+import shutil
+import tempfile
+import unittest
+from unittest.mock import patch
+
+from airflow import settings  # type: ignore
+from airflow.utils.file import find_path_from_directory  # type: ignore
+
+
+class TestIgnorePluginFile(unittest.TestCase):
+"""
+Test that the .airflowignore work and whether the file is properly ignored.
+"""
+
+def setUp(self):
+"""
+Make tmp folder and files that should be ignored. And set base path.
+"""
+self.test_dir = tempfile.mkdtemp()
+self.test_file = os.path.join(self.test_dir, 'test_file.txt')
+self.plugin_folder_path = os.path.join(self.test_dir, 'test_ignore')
+os.mkdir(os.path.join(self.test_dir, "test_ignore"))
+with open(os.path.join(self.plugin_folder_path, "test_load.py"), "w") 
as file:
+file.write("#Should not be ignored file")
+with open(os.path.join(self.plugin_folder_path, ".airflowignore"), 
"w") as file:
+file.write("#ignore test\nnot\nsubdir2")
+os.mkdir(os.path.join(self.plugin_folder_path, "subdir1"))
+with open(os.path.join(self.plugin_folder_path, 
"subdir1/.airflowignore"), "w") as file:
+file.write("#ignore test\nnone")
+with open(os.path.join(self.plugin_folder_path, 
"subdir1/test_load_sub1.py"), "w") as file:
+file.write("#Should not be ignored file")
+with open(os.path.join(self.plugin_folder_path, 
"test_notload_sub.py"), 'w') as file:
+file.write('raise Exception("This file should have been 
ignored!")')
+with open(os.path.join(self.plugin_folder_path, 
"subdir1/test_noneload_sub1.py"), 'w') as file:
+file.write('raise Exception("This file should have been 
ignored!")')
+os.mkdir(os.path.join(self.plugin_folder_path, "subdir2"))
+with open(os.path.join(self.plugin_folder_path, 
"subdir2/test_shouldignore.py"), 'w') as file:
+file.write('raise Exception("This file should have been 
ignored!")')
+with open(os.path.join(self.plugin_folder_path, 
"subdir2/test_shouldignore.py"), 'w') as file:
+file.write('raise Exception("This file should have been 
ignored!")')

Review comment:
   Is content of those files important? There's a lot of repeated code so I 
would opt for some loop like:
   ```python
   for file_path, content in files_content:
   with open(file_path) as f:
   f.wrtie(content)
   ```
   Do you think it will make the code clearer? 





This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org




[GitHub] [airflow] turbaszek commented on a change in pull request #9531: Support .airflowignore for plugins

2020-07-03 Thread GitBox


turbaszek commented on a change in pull request #9531:
URL: https://github.com/apache/airflow/pull/9531#discussion_r449419305



##
File path: tests/plugins/test_ignore/subdir1/test_load_sub1.py
##
@@ -0,0 +1,35 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+"""Import module"""
+from airflow.models.baseoperator import BaseOperator  # type: ignore
+from airflow.utils.decorators import apply_defaults  # type: ignore
+
+
+class Sub1TestLoadOperator(BaseOperator):
+"""
+Test load operator
+"""
+@apply_defaults
+def __init__(
+self,
+*args,
+**kwargs):
+super(Sub1TestLoadOperator, self).__init__(*args, **kwargs)
+
+def execute(self, context):
+pass

Review comment:
   What is the purpose of those files? I think I don't see in tests





This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org




[GitHub] [airflow] turbaszek commented on a change in pull request #9531: Support .airflowignore for plugins

2020-07-03 Thread GitBox


turbaszek commented on a change in pull request #9531:
URL: https://github.com/apache/airflow/pull/9531#discussion_r449419305



##
File path: tests/plugins/test_ignore/subdir1/test_load_sub1.py
##
@@ -0,0 +1,35 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+"""Import module"""
+from airflow.models.baseoperator import BaseOperator  # type: ignore
+from airflow.utils.decorators import apply_defaults  # type: ignore
+
+
+class Sub1TestLoadOperator(BaseOperator):
+"""
+Test load operator
+"""
+@apply_defaults
+def __init__(
+self,
+*args,
+**kwargs):
+super(Sub1TestLoadOperator, self).__init__(*args, **kwargs)
+
+def execute(self, context):
+pass

Review comment:
   What is the purpose of those files? I think I don't see it tests





This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org




[GitHub] [airflow] turbaszek commented on a change in pull request #9531: Support .airflowignore for plugins

2020-07-03 Thread GitBox


turbaszek commented on a change in pull request #9531:
URL: https://github.com/apache/airflow/pull/9531#discussion_r449418389



##
File path: tests/plugins/test_plugin_ignore.py
##
@@ -0,0 +1,89 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+import os
+import shutil
+import tempfile
+import unittest
+from unittest.mock import patch
+
+from airflow import settings  # type: ignore
+from airflow.utils.file import find_path_from_directory  # type: ignore
+
+
+class TestIgnorePluginFile(unittest.TestCase):
+"""
+Test that the .airflowignore work and whether the file is properly ignored.
+"""
+
+def setUp(self):
+"""
+Make tmp folder and files that should be ignored. And set base path.
+"""
+self.test_dir = tempfile.mkdtemp()
+self.test_file = os.path.join(self.test_dir, 'test_file.txt')
+self.plugin_folder_path = os.path.join(self.test_dir, 'test_ignore')
+shutil.copytree(os.path.join(settings.PLUGINS_FOLDER, 'test_ignore'), 
self.plugin_folder_path)
+file = open(os.path.join(self.plugin_folder_path, 
"test_notload_sub.py"), 'w')
+file.write('raise Exception("This file should have been ignored!")')
+file.close()
+file = open(os.path.join(self.plugin_folder_path, 
"subdir1/test_noneload_sub1.py"), 'w')
+file.write('raise Exception("This file should have been ignored!")')
+file.close()
+os.mkdir(os.path.join(self.plugin_folder_path, "subdir2"))
+file = open(os.path.join(self.plugin_folder_path, 
"subdir2/test_shouldignore.py"), 'w')
+file.write('raise Exception("This file should have been ignored!")')
+file.close()
+self.mock_plugins_folder = patch.object(
+settings, 'PLUGINS_FOLDER', return_value=self.plugin_folder_path
+)
+
+def tearDown(self):
+"""
+Delete tmp folder
+"""
+shutil.rmtree(self.test_dir)
+
+def test_find_not_should_ignore_path(self):
+"""
+Test that the .airflowignore work and whether the file is properly 
ignored.
+"""
+
+detected_files = set()
+should_ignore_files = {
+'test_notload.py',
+'test_notload_sub.py',
+'test_noneload_sub1.py',
+'test_shouldignore.py'
+}
+should_not_ignore_files = {
+'test_load.py',
+'test_load_sub1.py'
+}
+ignore_list_file = ".airflowignore"
+for file_path in find_path_from_directory(self.plugin_folder_path, 
ignore_list_file):
+if not os.path.isfile(file_path):
+continue
+_, file_ext = os.path.splitext(os.path.split(file_path)[-1])
+if file_ext != '.py':
+continue
+detected_files.add(os.path.basename(file_path))
+self.assertEqual(detected_files, should_not_ignore_files)
+for path in detected_files:
+self.assertNotIn(path, should_ignore_files)

Review comment:
   ```suggestion
   self.assertEqual(detected_files & should_ignore_files, set())
   ```
   WDYT?





This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org




[GitHub] [airflow] turbaszek commented on a change in pull request #9531: Support .airflowignore for plugins

2020-07-03 Thread GitBox


turbaszek commented on a change in pull request #9531:
URL: https://github.com/apache/airflow/pull/9531#discussion_r449417007



##
File path: tests/plugins/test_plugin_ignore.py
##
@@ -0,0 +1,89 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+import os
+import shutil
+import tempfile
+import unittest
+from unittest.mock import patch
+
+from airflow import settings  # type: ignore
+from airflow.utils.file import find_path_from_directory  # type: ignore
+
+
+class TestIgnorePluginFile(unittest.TestCase):
+"""
+Test that the .airflowignore work and whether the file is properly ignored.
+"""
+
+def setUp(self):
+"""
+Make tmp folder and files that should be ignored. And set base path.
+"""
+self.test_dir = tempfile.mkdtemp()
+self.test_file = os.path.join(self.test_dir, 'test_file.txt')
+self.plugin_folder_path = os.path.join(self.test_dir, 'test_ignore')
+shutil.copytree(os.path.join(settings.PLUGINS_FOLDER, 'test_ignore'), 
self.plugin_folder_path)
+file = open(os.path.join(self.plugin_folder_path, 
"test_notload_sub.py"), 'w')
+file.write('raise Exception("This file should have been ignored!")')
+file.close()
+file = open(os.path.join(self.plugin_folder_path, 
"subdir1/test_noneload_sub1.py"), 'w')
+file.write('raise Exception("This file should have been ignored!")')
+file.close()
+os.mkdir(os.path.join(self.plugin_folder_path, "subdir2"))
+file = open(os.path.join(self.plugin_folder_path, 
"subdir2/test_shouldignore.py"), 'w')
+file.write('raise Exception("This file should have been ignored!")')
+file.close()

Review comment:
   Can you please use the ctx manager?
   ```python
   with open(...) as file:
   file.write()
   ```





This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org




[GitHub] [airflow] turbaszek commented on a change in pull request #9531: Support .airflowignore for plugins

2020-07-02 Thread GitBox


turbaszek commented on a change in pull request #9531:
URL: https://github.com/apache/airflow/pull/9531#discussion_r448905152



##
File path: airflow/utils/file.py
##
@@ -90,6 +90,47 @@ def open_maybe_zipped(fileloc, mode='r'):
 return io.open(fileloc, mode=mode)
 
 
+def find_path_from_directory(
+base_dir_path: str,
+ignore_list_file: str) -> Generator[str, None, None]:
+"""
+Search the file and return the path of the file that should not be ignored.
+:param base_dir_path: the base path to be searched for.
+:param ignore_file_list_name: the file name in which specifies a regular 
expression pattern is written.

Review comment:
   ```suggestion
   ignore_file_name: str) -> Generator[str, None, None]:
   """
   Search the file and return the path of the file that should not be 
ignored.
   :param base_dir_path: the base path to be searched for.
   :param ignore_file_name: the file name in which specifies a regular 
expression pattern is written.
   ```
   WDYT?





This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org




[GitHub] [airflow] turbaszek commented on a change in pull request #9531: Support .airflowignore for plugins

2020-07-01 Thread GitBox


turbaszek commented on a change in pull request #9531:
URL: https://github.com/apache/airflow/pull/9531#discussion_r448366593



##
File path: airflow/utils/file.py
##
@@ -90,6 +90,48 @@ def open_maybe_zipped(fileloc, mode='r'):
 return io.open(fileloc, mode=mode)
 
 
+def find_path_from_directory(
+base_dir_path: str,
+ignore_list_file: str) -> Generator[str, None, None]:
+"""
+Search the file and return the path of the file that should not be ignored.
+:param base_dir_path: the base path to be searched for.
+:param ignore_file_list_name: the file name in which specifies a regular 
expression pattern is written.
+
+:return : file path not to be ignored
+"""
+
+patterns_by_dir: Dict[str, List[Pattern[str]]] = {}
+
+for root, dirs, files in os.walk(str(base_dir_path), followlinks=True):
+patterns: List[Pattern[str]] = patterns_by_dir.get(root, [])
+
+ignore_list_file_path = os.path.join(root, ignore_list_file)
+if os.path.isfile(ignore_list_file_path):
+with open(ignore_list_file_path, 'r') as file:
+lines_no_comments = [re.compile(r"\s*#.*").sub("", line) for 
line in file.read().split("\n")]
+patterns += [re.compile(line) for line in lines_no_comments if 
line]
+patterns = list(set(patterns))
+
+dirs[:] = [
+subdir
+for subdir in dirs
+if not any(p.search(
+os.path.join(os.path.relpath(root, str(base_dir_path)), 
subdir)) for p in patterns)
+]
+
+for subdir in dirs:
+patterns_by_dir[os.path.join(root, subdir)] = patterns.copy()

Review comment:
   ```suggestion
   
   patterns_by_dir  = {os.path.join(root, sd): patterns.copy() for sd 
in dirs}
   ```
   WDYT? Also, do we have to create copy of `patterns` each time?

##
File path: airflow/plugins_manager.py
##
@@ -164,34 +165,34 @@ def load_plugins_from_plugin_directory():
 global plugins  # pylint: disable=global-statement
 log.debug("Loading plugins from directory: %s", settings.PLUGINS_FOLDER)
 
-# Crawl through the plugins folder to find AirflowPlugin derivatives
-for root, _, files in os.walk(settings.PLUGINS_FOLDER, followlinks=True):  
# noqa # pylint: disable=too-many-nested-blocks
-for f in files:
-filepath = os.path.join(root, f)
-try:
-if not os.path.isfile(filepath):
-continue
-mod_name, file_ext = os.path.splitext(
-os.path.split(filepath)[-1])
-if file_ext != '.py':
-continue
-
-log.debug('Importing plugin module %s', filepath)
-
-loader = importlib.machinery.SourceFileLoader(mod_name, 
filepath)
-spec = importlib.util.spec_from_loader(mod_name, loader)
-mod = importlib.util.module_from_spec(spec)
-sys.modules[spec.name] = mod
-loader.exec_module(mod)
-for mod_attr_value in list(mod.__dict__.values()):
-if is_valid_plugin(mod_attr_value):
-plugin_instance = mod_attr_value()
-plugins.append(plugin_instance)
-except Exception as e:  # pylint: disable=broad-except
-log.exception(e)
-path = filepath or str(f)
-log.error('Failed to import plugin %s', path)
-import_errors[path] = str(e)
+ignore_list_file = ".airflowignore"

Review comment:
   Should we make it a constant?

##
File path: airflow/utils/file.py
##
@@ -90,6 +90,48 @@ def open_maybe_zipped(fileloc, mode='r'):
 return io.open(fileloc, mode=mode)
 
 
+def find_path_from_directory(
+base_dir_path: str,
+ignore_list_file: str) -> Generator[str, None, None]:
+"""
+Search the file and return the path of the file that should not be ignored.
+:param base_dir_path: the base path to be searched for.
+:param ignore_file_list_name: the file name in which specifies a regular 
expression pattern is written.
+
+:return : file path not to be ignored
+"""
+
+patterns_by_dir: Dict[str, List[Pattern[str]]] = {}
+
+for root, dirs, files in os.walk(str(base_dir_path), followlinks=True):
+patterns: List[Pattern[str]] = patterns_by_dir.get(root, [])
+
+ignore_list_file_path = os.path.join(root, ignore_list_file)
+if os.path.isfile(ignore_list_file_path):
+with open(ignore_list_file_path, 'r') as file:
+lines_no_comments = [re.compile(r"\s*#.*").sub("", line) for 
line in file.read().split("\n")]
+patterns += [re.compile(line) for line in lines_no_comments if 
line]
+patterns = list(set(patterns))
+
+dirs[:] = [
+subdir
+for subdir in dirs
+