This is an automated email from the ASF dual-hosted git repository. elizabeth pushed a commit to branch elizabeth/csv-tests in repository https://gitbox.apache.org/repos/asf/superset.git
commit c48d1c9ab1d560460482d866c65a1393f38c4181 Author: Elizabeth Thompson <[email protected]> AuthorDate: Thu Mar 13 17:09:27 2025 -0700 add more csv tests --- tests/unit_tests/charts/test_client_processing.py | 91 ++++++++++ tests/unit_tests/utils/csv_tests.py | 208 +++++++++++++++++++++- 2 files changed, 293 insertions(+), 6 deletions(-) diff --git a/tests/unit_tests/charts/test_client_processing.py b/tests/unit_tests/charts/test_client_processing.py index b38513a2e3..cecc43c45b 100644 --- a/tests/unit_tests/charts/test_client_processing.py +++ b/tests/unit_tests/charts/test_client_processing.py @@ -2060,6 +2060,97 @@ COUNT(is_software_dev) } +def test_apply_client_processing_csv_format_simple_table(): + """ + It should be able to process csv results + And not show a default column + """ + + result = { + "queries": [ + { + "result_format": ChartDataResultFormat.CSV, + "data": """ +COUNT(is_software_dev) +4725 +""", + } + ] + } + form_data = { + "datasource": "19__table", + "viz_type": "table", + "slice_id": 69, + "url_params": {}, + "granularity_sqla": "time_start", + "time_grain_sqla": "P1D", + "time_range": "No filter", + "groupbyColumns": [], + "groupbyRows": [], + "metrics": [ + { + "aggregate": "COUNT", + "column": { + "column_name": "is_software_dev", + "description": None, + "expression": None, + "filterable": True, + "groupby": True, + "id": 1463, + "is_dttm": False, + "python_date_format": None, + "type": "DOUBLE PRECISION", + "verbose_name": None, + }, + "expressionType": "SIMPLE", + "hasCustomLabel": False, + "isNew": False, + "label": "COUNT(is_software_dev)", + "optionName": "metric_9i1kctig9yr_sizo6ihd2o", + "sqlExpression": None, + } + ], + "metricsLayout": "COLUMNS", + "adhoc_filters": [ + { + "clause": "WHERE", + "comparator": "Currently A Developer", + "expressionType": "SIMPLE", + "filterOptionName": "filter_fvi0jg9aii_2lekqrhy7qk", + "isExtra": False, + "isNew": False, + "operator": "==", + "sqlExpression": None, + "subject": "developer_type", + } + ], + "row_limit": 10000, + "order_desc": True, + "aggregateFunction": "Sum", + "valueFormat": "SMART_NUMBER", + "date_format": "smart_date", + "rowOrder": "key_a_to_z", + "colOrder": "key_a_to_z", + "extra_form_data": {}, + "force": False, + "result_format": "json", + "result_type": "results", + } + + assert apply_client_processing(result, form_data) == { + "queries": [ + { + "result_format": ChartDataResultFormat.CSV, + "data": "COUNT(is_software_dev)\n4725\n", + "colnames": ["COUNT(is_software_dev)"], + "indexnames": [0], + "coltypes": [GenericDataType.NUMERIC], + "rowcount": 1, + } + ] + } + + def test_apply_client_processing_csv_format_empty_string(): """ It should be able to process csv results with no data diff --git a/tests/unit_tests/utils/csv_tests.py b/tests/unit_tests/utils/csv_tests.py index fa2c79eb88..1ba03d2029 100644 --- a/tests/unit_tests/utils/csv_tests.py +++ b/tests/unit_tests/utils/csv_tests.py @@ -15,19 +15,27 @@ # specific language governing permissions and limitations # under the License. +import json + import pandas as pd import pyarrow as pa import pytest # noqa: F401 +from pandas.api.types import is_datetime64_any_dtype from superset.utils import csv +from superset.utils.core import GenericDataType +from superset.utils.csv import ( + df_to_escaped_csv, + get_chart_dataframe, +) def test_escape_value(): result = csv.escape_value("value") assert result == "value" - result = csv.escape_value("-10") - assert result == "-10" + result = csv.escape_value("10") + assert result == "10" result = csv.escape_value("@value") assert result == "'@value" @@ -35,8 +43,8 @@ def test_escape_value(): result = csv.escape_value("+value") assert result == "'+value" - result = csv.escape_value("-value") - assert result == "'-value" + result = csv.escape_value("value") + assert result == "'value" result = csv.escape_value("=value") assert result == "'=value" @@ -57,6 +65,84 @@ def test_escape_value(): assert result == "' =10+2" +def fake_get_chart_csv_data_none(chart_url, auth_cookies=None): + return None + + +def fake_get_chart_csv_data_empty(chart_url, auth_cookies=None): + # Return JSON with empty data so that the resulting DataFrame is empty + fake_result = { + "result": [{"data": {}, "coltypes": [], "colnames": [], "indexnames": []}] + } + return json.dumps(fake_result).encode("utf-8") + + +def fake_get_chart_csv_data_valid(chart_url, auth_cookies=None): + # Return JSON with non-temporal data and valid indexnames so that they are used. + fake_result = { + "result": [ + { + "data": {"col1": [1, 2], "col2": ["a", "b"]}, + "coltypes": [GenericDataType.NUMERIC, GenericDataType.STRING], + "colnames": ["col1", "col2"], + # Provide two index names so that a MultiIndex is built. + "indexnames": ["idx1", "idx2"], + } + ] + } + return json.dumps(fake_result).encode("utf-8") + + +def fake_get_chart_csv_data_temporal(chart_url, auth_cookies=None): + """ + Return JSON with a temporal column and valid indexnames + so that a MultiIndex is built. + """ + fake_result = { + "result": [ + { + "data": {"date": [1609459200000, 1612137600000], "val": [10, 20]}, + "coltypes": [GenericDataType.TEMPORAL, GenericDataType.NUMERIC], + "colnames": ["date", "val"], + # Provide two index names so a MultiIndex is built. + "indexnames": [0, 1], + } + ] + } + return json.dumps(fake_result).encode("utf-8") + + +def fake_get_chart_csv_data_hierarchical(chart_url, auth_cookies=None): + # Return JSON with hierarchical column (list-based) and matching index names. + fake_result = { + "result": [ + { + "data": {"a": [1, 2]}, + "coltypes": [GenericDataType.NUMERIC], + "colnames": [["level1", "a"]], + # Provide two index tuples for two rows + "indexnames": [["idx"], ["idx"]], + } + ] + } + return json.dumps(fake_result).encode("utf-8") + + +def fake_get_chart_csv_data_default(chart_url, auth_cookies=None): + # When indexnames is empty, pandas should fall back to a default RangeIndex + fake_result = { + "result": [ + { + "data": {"col1": [1, 2], "col2": ["a", "b"]}, + "coltypes": [GenericDataType.NUMERIC, GenericDataType.STRING], + "colnames": ["col1", "col2"], + "indexnames": None, + } + ] + } + return json.dumps(fake_result).encode("utf-8") + + def test_df_to_escaped_csv(): df = pd.DataFrame( data={ @@ -73,7 +159,7 @@ def test_df_to_escaped_csv(): } ) - escaped_csv_str = csv.df_to_escaped_csv( + escaped_csv_str = df_to_escaped_csv( df, encoding="utf8", index=False, @@ -94,4 +180,114 @@ def test_df_to_escaped_csv(): ] df = pa.array([1, None]).to_pandas(integer_object_nulls=True).to_frame() - assert csv.df_to_escaped_csv(df, encoding="utf8", index=False) == '0\n1\n""\n' + assert df_to_escaped_csv(df, encoding="utf8", index=False) == '0\n1\n""\n' + + +def test_get_chart_dataframe_returns_none_when_no_content( + monkeypatch: pytest.MonkeyPatch, +): + monkeypatch.setattr(csv, "get_chart_csv_data", fake_get_chart_csv_data_none) + result = get_chart_dataframe("http://dummy-url") + assert result is None + + +def test_get_chart_dataframe_returns_none_for_empty_data( + monkeypatch: pytest.MonkeyPatch, +): + monkeypatch.setattr(csv, "get_chart_csv_data", fake_get_chart_csv_data_empty) + result = get_chart_dataframe("http://dummy-url") + # When data is empty, the function should return None + assert result is None + + +def test_get_chart_dataframe_valid_non_temporal(monkeypatch: pytest.MonkeyPatch): + monkeypatch.setattr(csv, "get_chart_csv_data", fake_get_chart_csv_data_valid) + df = get_chart_dataframe("http://dummy-url") + assert df is not None + + expected_columns = pd.MultiIndex.from_tuples([("col1",), ("col2",)]) + pd.testing.assert_index_equal(df.columns, expected_columns) + + expected_index = pd.MultiIndex.from_tuples([("idx1",), ("idx2",)]) + pd.testing.assert_index_equal(df.index, expected_index) + + pd.testing.assert_series_equal( + df[("col1",)], pd.Series([1, 2], name=("col1",), index=df.index) + ) + pd.testing.assert_series_equal( + df[("col2",)], pd.Series(["a", "b"], name=("col2",), index=df.index) + ) + markdown_str = df.to_markdown() + expected_markdown_str = """ +| | ('col1',) | ('col2',) | +|:----------|------------:|:------------| +| ('idx1',) | 1 | a | +| ('idx2',) | 2 | b | +""" + assert markdown_str.strip() == expected_markdown_str.strip() + + +def test_get_chart_dataframe_valid_temporal(monkeypatch: pytest.MonkeyPatch): + monkeypatch.setattr(csv, "get_chart_csv_data", fake_get_chart_csv_data_temporal) + df = get_chart_dataframe("http://dummy-url") + expected_columns = pd.MultiIndex.from_tuples([("date",), ("val",)]) + assert df is not None + pd.testing.assert_index_equal(df.columns, expected_columns) + + expected_index = pd.MultiIndex.from_tuples([(0,), (1,)]) + pd.testing.assert_index_equal(df.index, expected_index) + + assert is_datetime64_any_dtype(df[("date",)]) + expected_dates = pd.to_datetime([1609459200000, 1612137600000], unit="ms").astype( + "datetime64[ms]" + ) + actual_dates = df[("date",)].reset_index(drop=True) + pd.testing.assert_series_equal( + actual_dates, pd.Series(expected_dates, name=("date",)), check_names=False + ) + pd.testing.assert_series_equal( + df[("val",)], pd.Series([10, 20], name=("val",), index=df.index) + ) + markdown_str = df.to_markdown() + expected_markdown_str = """ +| | ('date',) | ('val',) | +|:-----|:--------------------|-----------:| +| (0,) | 2021-01-01 00:00:00 | 10 | +| (1,) | 2021-02-01 00:00:00 | 20 | +""" + assert markdown_str.strip() == expected_markdown_str.strip() + + +def test_get_chart_dataframe_with_hierarchical_columns(monkeypatch: pytest.MonkeyPatch): + monkeypatch.setattr(csv, "get_chart_csv_data", fake_get_chart_csv_data_hierarchical) + df = get_chart_dataframe("http://dummy-url") + assert df is not None + expected_columns = pd.MultiIndex.from_tuples([("level1", "a")]) + pd.testing.assert_index_equal(df.columns, expected_columns) + + expected_index = pd.MultiIndex.from_tuples([("idx",)] * len(df)) + pd.testing.assert_index_equal(df.index, expected_index) + + pd.testing.assert_series_equal( + df[("level1", "a")], pd.Series([1, 2], name=("level1", "a"), index=df.index) + ) + markdown_str = df.to_markdown() + expected_markdown_str = """ +| | ('level1', 'a') | +|:---------|------------------:| +| ('idx',) | 1 | +| ('idx',) | 2 | +""" + assert markdown_str.strip() == expected_markdown_str.strip() + + +def test_get_chart_dataframe_default_range_index(monkeypatch: pytest.MonkeyPatch): + monkeypatch.setattr(csv, "get_chart_csv_data", fake_get_chart_csv_data_default) + df = get_chart_dataframe("http://dummy-url") + assert df is not None + expected_columns = pd.MultiIndex.from_tuples([("col1",), ("col2",)]) + pd.testing.assert_index_equal(df.columns, expected_columns) + # When indexnames is empty, the DataFrame should have a default RangeIndex + assert isinstance(df.index, pd.RangeIndex) + pd.testing.assert_series_equal(df[("col1",)], pd.Series([1, 2], name=("col1",))) + pd.testing.assert_series_equal(df[("col2",)], pd.Series(["a", "b"], name=("col2",)))
