rahul-madaan commented on code in PR #47168:
URL: https://github.com/apache/airflow/pull/47168#discussion_r1978841713
##########
providers/snowflake/tests/unit/snowflake/transfers/test_copy_into_snowflake.py:
##########
@@ -229,3 +229,155 @@ def
test_get_openlineage_facets_on_complete_unsupported_azure_uri(self, mock_hoo
run_facets=expected_run_facets,
job_facets={"sql": SQLJobFacet(query=expected_sql)},
)
+
+
@mock.patch("airflow.providers.snowflake.transfers.copy_into_snowflake.SnowflakeHook")
+ def
test_get_openlineage_facets_on_complete_with_multiple_files_in_same_directory(self,
mock_hook):
+ # Test that files in the same directory are tracked individually
+ mock_hook().run.return_value = [
+ {"file": "s3://bucket/same_dir/file1.csv"},
+ {"file": "s3://bucket/same_dir/file2.csv"},
+ {"file": "s3://bucket/same_dir/file3.csv"},
+ {"file": "gcs://bucket/another_dir/file1.csv"},
+ {"file": "gcs://bucket/another_dir/file2.csv"},
+ ]
+ mock_hook().get_openlineage_database_info.return_value = DatabaseInfo(
+ scheme="snowflake_scheme", authority="authority",
database="actual_database"
+ )
+ mock_hook().get_openlineage_default_schema.return_value =
"actual_schema"
+ mock_hook().query_ids = ["query_id_123"]
+
+ expected_inputs = [
+ Dataset(namespace="gcs://bucket", name="another_dir/file1.csv"),
+ Dataset(namespace="gcs://bucket", name="another_dir/file2.csv"),
+ Dataset(namespace="s3://bucket", name="same_dir/file1.csv"),
+ Dataset(namespace="s3://bucket", name="same_dir/file2.csv"),
+ Dataset(namespace="s3://bucket", name="same_dir/file3.csv"),
+ ]
+ expected_outputs = [
+ Dataset(namespace="snowflake_scheme://authority",
name="actual_database.actual_schema.table")
+ ]
+ expected_sql = """COPY INTO schema.table\n FROM @stage/\n
FILE_FORMAT=CSV"""
+
+ op = CopyFromExternalStageToSnowflakeOperator(
+ task_id="test",
+ table="table",
+ stage="stage",
+ database="",
+ schema="schema",
+ file_format="CSV",
+ )
+ op.execute(None)
+ result = op.get_openlineage_facets_on_complete(None)
+ assert result == OperatorLineage(
+ inputs=expected_inputs,
+ outputs=expected_outputs,
+ run_facets={
+ "externalQuery": ExternalQueryRunFacet(
+ externalQueryId="query_id_123",
source="snowflake_scheme://authority"
+ )
+ },
+ job_facets={"sql": SQLJobFacet(query=expected_sql)},
+ )
+
+
@mock.patch("airflow.providers.snowflake.transfers.copy_into_snowflake.SnowflakeHook")
+ def test_extract_openlineage_unique_dataset_paths_directly(self,
mock_hook):
Review Comment:
done!
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]