BentsiLeviav commented on code in PR #67080: URL: https://github.com/apache/airflow/pull/67080#discussion_r3275617470
########## providers/clickhouse/tests/unit/clickhouse/hooks/test_clickhouse.py: ########## @@ -0,0 +1,1344 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +from __future__ import annotations + +import json +import re +from unittest.mock import ANY, MagicMock, patch + +import pytest + +from airflow.models import Connection +from airflow.providers.clickhouse.hooks.clickhouse import ( + ClickHouseConnection, + ClickHouseHook, +) + +# --------------------------------------------------------------------------- +# Fixtures / shared connection definitions +# --------------------------------------------------------------------------- + +BASE_CONN = Connection( + conn_id="clickhouse_test", + conn_type="clickhouse", + host="clickhouse-host", + port=8123, + login="user", + password="secret", + schema="analytics", +) + +BASE_CONN_EXTRA = Connection( + conn_id="clickhouse_test_extra", + conn_type="clickhouse", + host="secure-host", + port=8443, + login="admin", + password="topsecret", + schema="prod", + extra=json.dumps( + { + "secure": True, + "verify": False, + "connect_timeout": 30, + "send_receive_timeout": 600, + "compress": False, + "client_name": "my-airflow", + } + ), +) + +MINIMAL_CONN = Connection( + conn_id="clickhouse_minimal", + conn_type="clickhouse", +) + +CONN_WITH_CLIENT_KWARGS = Connection( + conn_id="clickhouse_client_kwargs", + conn_type="clickhouse", + host="ch-host", + port=8123, + login="user", + password="pass", + schema="db", + extra=json.dumps( + { + "client_kwargs": { + "http_proxy": "http://proxy:8080", + "pool_mgr_params": {"num_pools": 4}, + } + } + ), +) + +CONN_WITH_CLIENT_KWARGS_STR = Connection( + conn_id="clickhouse_client_kwargs_str", + conn_type="clickhouse", + host="ch-host", + port=8123, + login="user", + password="pass", + schema="db", + extra=json.dumps( + { + # client_kwargs stored as a JSON string (as the UI text field stores it) + "client_kwargs": '{"http_proxy": "http://proxy:8080"}' + } + ), +) + +CONN_WITH_SESSION_SETTINGS = Connection( + conn_id="clickhouse_session", + conn_type="clickhouse", + host="ch-host", + port=8123, + login="user", + password="pass", + schema="db", + extra=json.dumps( + { + "session_settings": { + "max_execution_time": 120, + "max_threads": 4, + } + } + ), +) + +CONN_WITH_SESSION_SETTINGS_STR = Connection( + conn_id="clickhouse_session_str", + conn_type="clickhouse", + host="ch-host", + port=8123, + login="user", + password="pass", + schema="db", + extra=json.dumps( + { + # session_settings stored as a JSON string (as the UI text field stores it) + "session_settings": '{"max_execution_time": 60}' + } + ), +) + + +# --------------------------------------------------------------------------- +# Tests: get_conn +# --------------------------------------------------------------------------- + + +class TestClickHouseHookGetConn: + @patch("airflow.providers.clickhouse.hooks.clickhouse.ClickHouseHook.get_connection") + @patch("clickhouse_connect.get_client") + def test_get_conn_basic(self, mock_get_client, mock_get_connection): + mock_get_connection.return_value = BASE_CONN + hook = ClickHouseHook(clickhouse_conn_id="clickhouse_test") + conn = hook.get_conn() + + mock_get_client.assert_called_once_with( + host="clickhouse-host", + port=8123, + username="user", + password="secret", + database="analytics", + secure=False, + verify=True, + client_name=ANY, # always set; format verified in TestClickHouseHookClientName + ) + assert isinstance(conn, ClickHouseConnection) + + @patch("airflow.providers.clickhouse.hooks.clickhouse.ClickHouseHook.get_connection") + @patch("clickhouse_connect.get_client") + def test_get_conn_with_extra_params(self, mock_get_client, mock_get_connection): + mock_get_connection.return_value = BASE_CONN_EXTRA + hook = ClickHouseHook(clickhouse_conn_id="clickhouse_test_extra") + hook.get_conn() + + mock_get_client.assert_called_once_with( + host="secure-host", + port=8443, + username="admin", + password="topsecret", + database="prod", + secure=True, + verify=False, + connect_timeout=30, + send_receive_timeout=600, + compress=False, + client_name=ANY, # contains Airflow version + "(my-airflow)" comment + ) + + @patch("airflow.providers.clickhouse.hooks.clickhouse.ClickHouseHook.get_connection") + @patch("clickhouse_connect.get_client") + def test_get_conn_minimal_defaults(self, mock_get_client, mock_get_connection): + """All connection fields absent → sensible defaults applied.""" + mock_get_connection.return_value = MINIMAL_CONN + hook = ClickHouseHook(clickhouse_conn_id="clickhouse_minimal") + hook.get_conn() + + mock_get_client.assert_called_once_with( + host="localhost", + port=8123, + username="default", + password="", + database="default", + secure=False, + verify=True, + client_name=ANY, # always set; format verified in TestClickHouseHookClientName + ) + + @patch("airflow.providers.clickhouse.hooks.clickhouse.ClickHouseHook.get_connection") + @patch("clickhouse_connect.get_client") + def test_get_conn_database_overrides_connection_schema(self, mock_get_client, mock_get_connection): + """database constructor arg must override the schema field from the connection.""" + mock_get_connection.return_value = BASE_CONN # schema="analytics" + hook = ClickHouseHook(clickhouse_conn_id="clickhouse_test", database="overridden_db") + hook.get_conn() + + call_kwargs = mock_get_client.call_args.kwargs + assert call_kwargs["database"] == "overridden_db" + + @patch("airflow.providers.clickhouse.hooks.clickhouse.ClickHouseHook.get_connection") + @patch("clickhouse_connect.get_client") + def test_get_conn_no_database_falls_back_to_connection_schema(self, mock_get_client, mock_get_connection): + """Without a database constructor arg the connection schema is used.""" + mock_get_connection.return_value = BASE_CONN # schema="analytics" + hook = ClickHouseHook(clickhouse_conn_id="clickhouse_test") + hook.get_conn() + + call_kwargs = mock_get_client.call_args.kwargs + assert call_kwargs["database"] == "analytics" + + +# --------------------------------------------------------------------------- +# Tests: get_uri +# --------------------------------------------------------------------------- + + +class TestClickHouseHookGetUri: + @patch("airflow.providers.clickhouse.hooks.clickhouse.ClickHouseHook.get_connection") + def test_get_uri_with_password(self, mock_get_connection): + mock_get_connection.return_value = BASE_CONN + hook = ClickHouseHook(clickhouse_conn_id="clickhouse_test") + uri = hook.get_uri() + + assert uri == "clickhousedb://user:secret@clickhouse-host:8123/analytics" + + @patch("airflow.providers.clickhouse.hooks.clickhouse.ClickHouseHook.get_connection") + def test_get_uri_without_password(self, mock_get_connection): + conn = Connection( + conn_id="clickhouse_nopass", + conn_type="clickhouse", + host="myhost", + port=8123, + login="reader", + schema="logs", + ) + mock_get_connection.return_value = conn + hook = ClickHouseHook(clickhouse_conn_id="clickhouse_nopass") + uri = hook.get_uri() + + assert uri == "clickhousedb://reader@myhost:8123/logs" + + @patch("airflow.providers.clickhouse.hooks.clickhouse.ClickHouseHook.get_connection") + def test_get_uri_password_url_encoded(self, mock_get_connection): + """Special characters in the password must be percent-encoded.""" + conn = Connection( + conn_id="clickhouse_special", + conn_type="clickhouse", + host="host", + port=8123, + login="user", + password="p@ss/w0rd!", + schema="db", + ) + mock_get_connection.return_value = conn + hook = ClickHouseHook(clickhouse_conn_id="clickhouse_special") + uri = hook.get_uri() + + assert "@" not in uri.split("://")[1].split("@")[0] # password part is encoded + assert "p%40ss" in uri or "p%40" in uri # @ is encoded + + @patch("airflow.providers.clickhouse.hooks.clickhouse.ClickHouseHook.get_connection") + def test_get_uri_defaults(self, mock_get_connection): + mock_get_connection.return_value = MINIMAL_CONN + hook = ClickHouseHook(clickhouse_conn_id="clickhouse_minimal") + uri = hook.get_uri() + + assert uri == "clickhousedb://default@localhost:8123/default" + + @patch("airflow.providers.clickhouse.hooks.clickhouse.ClickHouseHook.get_connection") + def test_get_uri_database_overrides_connection_schema(self, mock_get_connection): + """database constructor arg must appear in the URI instead of the connection schema.""" + mock_get_connection.return_value = BASE_CONN # schema="analytics" + hook = ClickHouseHook(clickhouse_conn_id="clickhouse_test", database="overridden_db") + uri = hook.get_uri() + + assert uri.endswith("/overridden_db") + assert "analytics" not in uri + + @patch("airflow.providers.clickhouse.hooks.clickhouse.ClickHouseHook.get_connection") + def test_get_uri_secure_uses_clickhousedbs_scheme(self, mock_get_connection): + """secure=True in extra must produce the clickhousedbs:// (HTTPS) scheme.""" + conn = Connection( + conn_id="ch_secure", + conn_type="clickhouse", + host="secure-host", + port=8443, + login="user", + password="pass", + schema="db", + extra=json.dumps({"secure": True}), + ) + mock_get_connection.return_value = conn + uri = ClickHouseHook(clickhouse_conn_id="ch_secure").get_uri() + assert uri == "clickhousedbs://user:pass@secure-host:8443/db" Review Comment: I renamed the test to `test_get_uri_secure_adds_query_param` and updated the assertion to assert `uri == "clickhousedb://user:pass@secure-host:8443/db?secure=true"` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
