This is an automated email from the ASF dual-hosted git repository. potiuk pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/airflow.git
The following commit(s) were added to refs/heads/main by this push: new 2f2796f675 Fix `GCSToGCSOperator` behavior difference for moving single object (#40162) 2f2796f675 is described below commit 2f2796f675e238e0b266b8d6e4fdfe5c0b7d1bf3 Author: Bora Berke Sahin <67373739+borabe...@users.noreply.github.com> AuthorDate: Fri Jun 21 13:08:16 2024 +0300 Fix `GCSToGCSOperator` behavior difference for moving single object (#40162) * Merge different behavior of `GCSToGCSOperator` for single and multiple objects * Add behavior change note to changelog --- airflow/providers/google/CHANGELOG.rst | 11 ++++++ .../providers/google/cloud/transfers/gcs_to_gcs.py | 18 +++------ .../google/cloud/transfers/test_gcs_to_gcs.py | 46 ++++++++++++++++++++++ 3 files changed, 63 insertions(+), 12 deletions(-) diff --git a/airflow/providers/google/CHANGELOG.rst b/airflow/providers/google/CHANGELOG.rst index e14420a193..edaef2fb98 100644 --- a/airflow/providers/google/CHANGELOG.rst +++ b/airflow/providers/google/CHANGELOG.rst @@ -27,6 +27,17 @@ Changelog --------- +.. note:: + The ``GCSToGCSOperator`` now retains the nested folder structure when moving or copying a single + object, aligning its behavior with the behavior for multiple objects. If this change impacts your + workflows, you may need to adjust your ``source_object`` parameter to include the full path up to + the folder containing your single file and specify ``destination_object`` explicitly to ignore + nested folders. For example, if you previously used ``source_object='folder/nested_folder/'``, to + move file ``'folder/nested_folder/second_nested_folder/file'`` you should now use + ``source_object='folder/nested_folder/second_nested_folder/'`` and specify + ``destination_object='folder/nested_folder/'``. This would move the file to ``'folder/nested_folder/file'`` + instead of the fixed behavior of moving it to ``'folder/nested_folder/second_nested_folder/file'``. + 10.19.0 ....... diff --git a/airflow/providers/google/cloud/transfers/gcs_to_gcs.py b/airflow/providers/google/cloud/transfers/gcs_to_gcs.py index 0b3d330b65..7fb12dbf09 100644 --- a/airflow/providers/google/cloud/transfers/gcs_to_gcs.py +++ b/airflow/providers/google/cloud/transfers/gcs_to_gcs.py @@ -408,20 +408,9 @@ class GCSToGCSOperator(BaseOperator): msg = f"{prefix} does not exist in bucket {self.source_bucket}" self.log.warning(msg) raise AirflowException(msg) - if len(objects) == 1 and objects[0][-1] != "/": - self._copy_file(hook=hook, source_object=objects[0]) elif len(objects): self._copy_multiple_objects(hook=hook, source_objects=objects, prefix=prefix) - def _copy_file(self, hook, source_object): - destination_object = self.destination_object or source_object - if self.destination_object and self.destination_object[-1] == "/": - file_name = source_object.split("/")[-1] - destination_object += file_name - self._copy_single_object( - hook=hook, source_object=source_object, destination_object=destination_object - ) - def _copy_multiple_objects(self, hook, source_objects, prefix): # Check whether the prefix is a root directory for all the rest of objects. _pref = prefix.rstrip("/") @@ -441,7 +430,12 @@ class GCSToGCSOperator(BaseOperator): destination_object = source_obj else: file_name_postfix = source_obj.replace(base_path, "", 1) - destination_object = self.destination_object.rstrip("/") + "/" + file_name_postfix + + destination_object = ( + self.destination_object.rstrip("/")[0 : self.destination_object.rfind("/")] + + "/" + + file_name_postfix + ) self._copy_single_object( hook=hook, source_object=source_obj, destination_object=destination_object diff --git a/tests/providers/google/cloud/transfers/test_gcs_to_gcs.py b/tests/providers/google/cloud/transfers/test_gcs_to_gcs.py index 97e16e7366..4f63326af7 100644 --- a/tests/providers/google/cloud/transfers/test_gcs_to_gcs.py +++ b/tests/providers/google/cloud/transfers/test_gcs_to_gcs.py @@ -742,6 +742,52 @@ class TestGoogleCloudStorageToCloudStorageOperator: ["source/foo.txt"], ["{prefix}/foo.txt"], ), + ( + ["source/sub1/sub2/sub3/file.txt"], + "source/", + None, + False, + ["source/sub1/sub2/sub3/file.txt"], + ["{prefix}/sub1/sub2/sub3/file.txt"], + ), + ( + ["source/sub1/sub2/sub3/file.txt", "source/sub1/sub2/sub3/file2.txt"], + "source/", + None, + False, + ["source/sub1/sub2/sub3/file.txt", "source/sub1/sub2/sub3/file2.txt"], + ["{prefix}/sub1/sub2/sub3/file.txt", "{prefix}/sub1/sub2/sub3/file2.txt"], + ), + ( + [f"{DESTINATION_OBJECT_PREFIX}/sub1/sub2/sub3/file.txt"], + f"{DESTINATION_OBJECT_PREFIX}", + None, + False, + [f"{DESTINATION_OBJECT_PREFIX}/sub1/sub2/sub3/file.txt"], + ["{prefix}/sub1/sub2/sub3/file.txt"], + ), + ( + [f"{DESTINATION_OBJECT_PREFIX}/sub1/sub2/sub3/file.txt"], + f"{DESTINATION_OBJECT_PREFIX}/", + None, + False, + [f"{DESTINATION_OBJECT_PREFIX}/sub1/sub2/sub3/file.txt"], + ["{prefix}/sub1/sub2/sub3/file.txt"], + ), + ( + [ + f"{DESTINATION_OBJECT_PREFIX}/sub1/sub2/sub3/file.txt", + f"{DESTINATION_OBJECT_PREFIX}/sub1/sub2/sub3/file2.txt", + ], + f"{DESTINATION_OBJECT_PREFIX}/", + None, + False, + [ + f"{DESTINATION_OBJECT_PREFIX}/sub1/sub2/sub3/file.txt", + f"{DESTINATION_OBJECT_PREFIX}/sub1/sub2/sub3/file2.txt", + ], + ["{prefix}/sub1/sub2/sub3/file.txt", "{prefix}/sub1/sub2/sub3/file2.txt"], + ), ( ["source/foo.txt", "source/foo.txt.abc", "source/foo.txt/subfolder/file.txt"], "source/foo.txt",