This is an automated email from the ASF dual-hosted git repository.

potiuk pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/airflow.git


The following commit(s) were added to refs/heads/main by this push:
     new 2f2796f675 Fix `GCSToGCSOperator` behavior difference for moving 
single object (#40162)
2f2796f675 is described below

commit 2f2796f675e238e0b266b8d6e4fdfe5c0b7d1bf3
Author: Bora Berke Sahin <67373739+borabe...@users.noreply.github.com>
AuthorDate: Fri Jun 21 13:08:16 2024 +0300

    Fix `GCSToGCSOperator` behavior difference for moving single object (#40162)
    
    * Merge different behavior of `GCSToGCSOperator` for single and multiple 
objects
    
    * Add behavior change note to changelog
---
 airflow/providers/google/CHANGELOG.rst             | 11 ++++++
 .../providers/google/cloud/transfers/gcs_to_gcs.py | 18 +++------
 .../google/cloud/transfers/test_gcs_to_gcs.py      | 46 ++++++++++++++++++++++
 3 files changed, 63 insertions(+), 12 deletions(-)

diff --git a/airflow/providers/google/CHANGELOG.rst 
b/airflow/providers/google/CHANGELOG.rst
index e14420a193..edaef2fb98 100644
--- a/airflow/providers/google/CHANGELOG.rst
+++ b/airflow/providers/google/CHANGELOG.rst
@@ -27,6 +27,17 @@
 Changelog
 ---------
 
+.. note::
+  The ``GCSToGCSOperator`` now retains the nested folder structure when moving 
or copying a single
+  object, aligning its behavior with the behavior for multiple objects. If 
this change impacts your
+  workflows, you may need to adjust your ``source_object`` parameter to 
include the full path up to
+  the folder containing your single file and specify ``destination_object`` 
explicitly to ignore
+  nested folders. For example, if you previously used 
``source_object='folder/nested_folder/'``, to
+  move file ``'folder/nested_folder/second_nested_folder/file'`` you should 
now use
+  ``source_object='folder/nested_folder/second_nested_folder/'`` and specify
+  ``destination_object='folder/nested_folder/'``. This would move the file to 
``'folder/nested_folder/file'``
+  instead of the fixed behavior of moving it to 
``'folder/nested_folder/second_nested_folder/file'``.
+
 10.19.0
 .......
 
diff --git a/airflow/providers/google/cloud/transfers/gcs_to_gcs.py 
b/airflow/providers/google/cloud/transfers/gcs_to_gcs.py
index 0b3d330b65..7fb12dbf09 100644
--- a/airflow/providers/google/cloud/transfers/gcs_to_gcs.py
+++ b/airflow/providers/google/cloud/transfers/gcs_to_gcs.py
@@ -408,20 +408,9 @@ class GCSToGCSOperator(BaseOperator):
                 msg = f"{prefix} does not exist in bucket {self.source_bucket}"
                 self.log.warning(msg)
                 raise AirflowException(msg)
-        if len(objects) == 1 and objects[0][-1] != "/":
-            self._copy_file(hook=hook, source_object=objects[0])
         elif len(objects):
             self._copy_multiple_objects(hook=hook, source_objects=objects, 
prefix=prefix)
 
-    def _copy_file(self, hook, source_object):
-        destination_object = self.destination_object or source_object
-        if self.destination_object and self.destination_object[-1] == "/":
-            file_name = source_object.split("/")[-1]
-            destination_object += file_name
-        self._copy_single_object(
-            hook=hook, source_object=source_object, 
destination_object=destination_object
-        )
-
     def _copy_multiple_objects(self, hook, source_objects, prefix):
         # Check whether the prefix is a root directory for all the rest of 
objects.
         _pref = prefix.rstrip("/")
@@ -441,7 +430,12 @@ class GCSToGCSOperator(BaseOperator):
                 destination_object = source_obj
             else:
                 file_name_postfix = source_obj.replace(base_path, "", 1)
-                destination_object = self.destination_object.rstrip("/") + "/" 
+ file_name_postfix
+
+                destination_object = (
+                    self.destination_object.rstrip("/")[0 : 
self.destination_object.rfind("/")]
+                    + "/"
+                    + file_name_postfix
+                )
 
             self._copy_single_object(
                 hook=hook, source_object=source_obj, 
destination_object=destination_object
diff --git a/tests/providers/google/cloud/transfers/test_gcs_to_gcs.py 
b/tests/providers/google/cloud/transfers/test_gcs_to_gcs.py
index 97e16e7366..4f63326af7 100644
--- a/tests/providers/google/cloud/transfers/test_gcs_to_gcs.py
+++ b/tests/providers/google/cloud/transfers/test_gcs_to_gcs.py
@@ -742,6 +742,52 @@ class TestGoogleCloudStorageToCloudStorageOperator:
                 ["source/foo.txt"],
                 ["{prefix}/foo.txt"],
             ),
+            (
+                ["source/sub1/sub2/sub3/file.txt"],
+                "source/",
+                None,
+                False,
+                ["source/sub1/sub2/sub3/file.txt"],
+                ["{prefix}/sub1/sub2/sub3/file.txt"],
+            ),
+            (
+                ["source/sub1/sub2/sub3/file.txt", 
"source/sub1/sub2/sub3/file2.txt"],
+                "source/",
+                None,
+                False,
+                ["source/sub1/sub2/sub3/file.txt", 
"source/sub1/sub2/sub3/file2.txt"],
+                ["{prefix}/sub1/sub2/sub3/file.txt", 
"{prefix}/sub1/sub2/sub3/file2.txt"],
+            ),
+            (
+                [f"{DESTINATION_OBJECT_PREFIX}/sub1/sub2/sub3/file.txt"],
+                f"{DESTINATION_OBJECT_PREFIX}",
+                None,
+                False,
+                [f"{DESTINATION_OBJECT_PREFIX}/sub1/sub2/sub3/file.txt"],
+                ["{prefix}/sub1/sub2/sub3/file.txt"],
+            ),
+            (
+                [f"{DESTINATION_OBJECT_PREFIX}/sub1/sub2/sub3/file.txt"],
+                f"{DESTINATION_OBJECT_PREFIX}/",
+                None,
+                False,
+                [f"{DESTINATION_OBJECT_PREFIX}/sub1/sub2/sub3/file.txt"],
+                ["{prefix}/sub1/sub2/sub3/file.txt"],
+            ),
+            (
+                [
+                    f"{DESTINATION_OBJECT_PREFIX}/sub1/sub2/sub3/file.txt",
+                    f"{DESTINATION_OBJECT_PREFIX}/sub1/sub2/sub3/file2.txt",
+                ],
+                f"{DESTINATION_OBJECT_PREFIX}/",
+                None,
+                False,
+                [
+                    f"{DESTINATION_OBJECT_PREFIX}/sub1/sub2/sub3/file.txt",
+                    f"{DESTINATION_OBJECT_PREFIX}/sub1/sub2/sub3/file2.txt",
+                ],
+                ["{prefix}/sub1/sub2/sub3/file.txt", 
"{prefix}/sub1/sub2/sub3/file2.txt"],
+            ),
             (
                 ["source/foo.txt", "source/foo.txt.abc", 
"source/foo.txt/subfolder/file.txt"],
                 "source/foo.txt",

Reply via email to