dirrao commented on code in PR #37237: URL: https://github.com/apache/airflow/pull/37237#discussion_r1482341323
########## airflow/providers/google/cloud/hooks/gcs.py: ########## @@ -1295,37 +1300,45 @@ def _prepare_sync_plan( destination_object: str | None, recursive: bool, ) -> tuple[set[storage.Blob], set[storage.Blob], set[storage.Blob]]: - # Calculate the number of characters that remove from the name, because they contain information + # Calculate the number of characters that are removed from the name, because they contain information # about the parent's path source_object_prefix_len = len(source_object) if source_object else 0 destination_object_prefix_len = len(destination_object) if destination_object else 0 delimiter = "/" if not recursive else None + # Fetch blobs list source_blobs = list(source_bucket.list_blobs(prefix=source_object, delimiter=delimiter)) destination_blobs = list( destination_bucket.list_blobs(prefix=destination_object, delimiter=delimiter) ) + # Create indexes that allow you to identify blobs based on their name source_names_index = {a.name[source_object_prefix_len:]: a for a in source_blobs} destination_names_index = {a.name[destination_object_prefix_len:]: a for a in destination_blobs} + # Create sets with names without parent object name source_names = set(source_names_index.keys()) + # Discards empty string that creates an empty source subdirectory Review Comment: The comment is not clear. Does GCS allows directory or filenames with empty string? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@airflow.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org