(superset) 01/01: add more csv tests

elizabeth Thu, 13 Mar 2025 17:09:58 -0700

This is an automated email from the ASF dual-hosted git repository.

elizabeth pushed a commit to branch elizabeth/csv-tests
in repository https://gitbox.apache.org/repos/asf/superset.git


commit c48d1c9ab1d560460482d866c65a1393f38c4181
Author: Elizabeth Thompson <[email protected]>
AuthorDate: Thu Mar 13 17:09:27 2025 -0700

    add more csv tests
---
 tests/unit_tests/charts/test_client_processing.py |  91 ++++++++++
 tests/unit_tests/utils/csv_tests.py               | 208 +++++++++++++++++++++-
 2 files changed, 293 insertions(+), 6 deletions(-)

diff --git a/tests/unit_tests/charts/test_client_processing.py 
b/tests/unit_tests/charts/test_client_processing.py
index b38513a2e3..cecc43c45b 100644
--- a/tests/unit_tests/charts/test_client_processing.py
+++ b/tests/unit_tests/charts/test_client_processing.py
@@ -2060,6 +2060,97 @@ COUNT(is_software_dev)
     }
 
 
+def test_apply_client_processing_csv_format_simple_table():
+    """
+    It should be able to process csv results
+    And not show a default column
+    """
+
+    result = {
+        "queries": [
+            {
+                "result_format": ChartDataResultFormat.CSV,
+                "data": """
+COUNT(is_software_dev)
+4725
+""",
+            }
+        ]
+    }
+    form_data = {
+        "datasource": "19__table",
+        "viz_type": "table",
+        "slice_id": 69,
+        "url_params": {},
+        "granularity_sqla": "time_start",
+        "time_grain_sqla": "P1D",
+        "time_range": "No filter",
+        "groupbyColumns": [],
+        "groupbyRows": [],
+        "metrics": [
+            {
+                "aggregate": "COUNT",
+                "column": {
+                    "column_name": "is_software_dev",
+                    "description": None,
+                    "expression": None,
+                    "filterable": True,
+                    "groupby": True,
+                    "id": 1463,
+                    "is_dttm": False,
+                    "python_date_format": None,
+                    "type": "DOUBLE PRECISION",
+                    "verbose_name": None,
+                },
+                "expressionType": "SIMPLE",
+                "hasCustomLabel": False,
+                "isNew": False,
+                "label": "COUNT(is_software_dev)",
+                "optionName": "metric_9i1kctig9yr_sizo6ihd2o",
+                "sqlExpression": None,
+            }
+        ],
+        "metricsLayout": "COLUMNS",
+        "adhoc_filters": [
+            {
+                "clause": "WHERE",
+                "comparator": "Currently A Developer",
+                "expressionType": "SIMPLE",
+                "filterOptionName": "filter_fvi0jg9aii_2lekqrhy7qk",
+                "isExtra": False,
+                "isNew": False,
+                "operator": "==",
+                "sqlExpression": None,
+                "subject": "developer_type",
+            }
+        ],
+        "row_limit": 10000,
+        "order_desc": True,
+        "aggregateFunction": "Sum",
+        "valueFormat": "SMART_NUMBER",
+        "date_format": "smart_date",
+        "rowOrder": "key_a_to_z",
+        "colOrder": "key_a_to_z",
+        "extra_form_data": {},
+        "force": False,
+        "result_format": "json",
+        "result_type": "results",
+    }
+
+    assert apply_client_processing(result, form_data) == {
+        "queries": [
+            {
+                "result_format": ChartDataResultFormat.CSV,
+                "data": "COUNT(is_software_dev)\n4725\n",
+                "colnames": ["COUNT(is_software_dev)"],
+                "indexnames": [0],
+                "coltypes": [GenericDataType.NUMERIC],
+                "rowcount": 1,
+            }
+        ]
+    }
+
+
 def test_apply_client_processing_csv_format_empty_string():
     """
     It should be able to process csv results with no data
diff --git a/tests/unit_tests/utils/csv_tests.py 
b/tests/unit_tests/utils/csv_tests.py
index fa2c79eb88..1ba03d2029 100644
--- a/tests/unit_tests/utils/csv_tests.py
+++ b/tests/unit_tests/utils/csv_tests.py
@@ -15,19 +15,27 @@
 # specific language governing permissions and limitations
 # under the License.
 
+import json
+
 import pandas as pd
 import pyarrow as pa
 import pytest  # noqa: F401
+from pandas.api.types import is_datetime64_any_dtype
 
 from superset.utils import csv
+from superset.utils.core import GenericDataType
+from superset.utils.csv import (
+    df_to_escaped_csv,
+    get_chart_dataframe,
+)
 
 
 def test_escape_value():
     result = csv.escape_value("value")
     assert result == "value"
 
-    result = csv.escape_value("-10")
-    assert result == "-10"
+    result = csv.escape_value("10")
+    assert result == "10"
 
     result = csv.escape_value("@value")
     assert result == "'@value"
@@ -35,8 +43,8 @@ def test_escape_value():
     result = csv.escape_value("+value")
     assert result == "'+value"
 
-    result = csv.escape_value("-value")
-    assert result == "'-value"
+    result = csv.escape_value("value")
+    assert result == "'value"
 
     result = csv.escape_value("=value")
     assert result == "'=value"
@@ -57,6 +65,84 @@ def test_escape_value():
     assert result == "' =10+2"
 
 
+def fake_get_chart_csv_data_none(chart_url, auth_cookies=None):
+    return None
+
+
+def fake_get_chart_csv_data_empty(chart_url, auth_cookies=None):
+    # Return JSON with empty data so that the resulting DataFrame is empty
+    fake_result = {
+        "result": [{"data": {}, "coltypes": [], "colnames": [], "indexnames": 
[]}]
+    }
+    return json.dumps(fake_result).encode("utf-8")
+
+
+def fake_get_chart_csv_data_valid(chart_url, auth_cookies=None):
+    # Return JSON with non-temporal data and valid indexnames so that they are 
used.
+    fake_result = {
+        "result": [
+            {
+                "data": {"col1": [1, 2], "col2": ["a", "b"]},
+                "coltypes": [GenericDataType.NUMERIC, GenericDataType.STRING],
+                "colnames": ["col1", "col2"],
+                # Provide two index names so that a MultiIndex is built.
+                "indexnames": ["idx1", "idx2"],
+            }
+        ]
+    }
+    return json.dumps(fake_result).encode("utf-8")
+
+
+def fake_get_chart_csv_data_temporal(chart_url, auth_cookies=None):
+    """
+    Return JSON with a temporal column and valid indexnames
+    so that a MultiIndex is built.
+    """
+    fake_result = {
+        "result": [
+            {
+                "data": {"date": [1609459200000, 1612137600000], "val": [10, 
20]},
+                "coltypes": [GenericDataType.TEMPORAL, 
GenericDataType.NUMERIC],
+                "colnames": ["date", "val"],
+                # Provide two index names so a MultiIndex is built.
+                "indexnames": [0, 1],
+            }
+        ]
+    }
+    return json.dumps(fake_result).encode("utf-8")
+
+
+def fake_get_chart_csv_data_hierarchical(chart_url, auth_cookies=None):
+    # Return JSON with hierarchical column (list-based) and matching index 
names.
+    fake_result = {
+        "result": [
+            {
+                "data": {"a": [1, 2]},
+                "coltypes": [GenericDataType.NUMERIC],
+                "colnames": [["level1", "a"]],
+                # Provide two index tuples for two rows
+                "indexnames": [["idx"], ["idx"]],
+            }
+        ]
+    }
+    return json.dumps(fake_result).encode("utf-8")
+
+
+def fake_get_chart_csv_data_default(chart_url, auth_cookies=None):
+    # When indexnames is empty, pandas should fall back to a default RangeIndex
+    fake_result = {
+        "result": [
+            {
+                "data": {"col1": [1, 2], "col2": ["a", "b"]},
+                "coltypes": [GenericDataType.NUMERIC, GenericDataType.STRING],
+                "colnames": ["col1", "col2"],
+                "indexnames": None,
+            }
+        ]
+    }
+    return json.dumps(fake_result).encode("utf-8")
+
+
 def test_df_to_escaped_csv():
     df = pd.DataFrame(
         data={
@@ -73,7 +159,7 @@ def test_df_to_escaped_csv():
         }
     )
 
-    escaped_csv_str = csv.df_to_escaped_csv(
+    escaped_csv_str = df_to_escaped_csv(
         df,
         encoding="utf8",
         index=False,
@@ -94,4 +180,114 @@ def test_df_to_escaped_csv():
     ]
 
     df = pa.array([1, None]).to_pandas(integer_object_nulls=True).to_frame()
-    assert csv.df_to_escaped_csv(df, encoding="utf8", index=False) == 
'0\n1\n""\n'
+    assert df_to_escaped_csv(df, encoding="utf8", index=False) == '0\n1\n""\n'
+
+
+def test_get_chart_dataframe_returns_none_when_no_content(
+    monkeypatch: pytest.MonkeyPatch,
+):
+    monkeypatch.setattr(csv, "get_chart_csv_data", 
fake_get_chart_csv_data_none)
+    result = get_chart_dataframe("http://dummy-url";)
+    assert result is None
+
+
+def test_get_chart_dataframe_returns_none_for_empty_data(
+    monkeypatch: pytest.MonkeyPatch,
+):
+    monkeypatch.setattr(csv, "get_chart_csv_data", 
fake_get_chart_csv_data_empty)
+    result = get_chart_dataframe("http://dummy-url";)
+    # When data is empty, the function should return None
+    assert result is None
+
+
+def test_get_chart_dataframe_valid_non_temporal(monkeypatch: 
pytest.MonkeyPatch):
+    monkeypatch.setattr(csv, "get_chart_csv_data", 
fake_get_chart_csv_data_valid)
+    df = get_chart_dataframe("http://dummy-url";)
+    assert df is not None
+
+    expected_columns = pd.MultiIndex.from_tuples([("col1",), ("col2",)])
+    pd.testing.assert_index_equal(df.columns, expected_columns)
+
+    expected_index = pd.MultiIndex.from_tuples([("idx1",), ("idx2",)])
+    pd.testing.assert_index_equal(df.index, expected_index)
+
+    pd.testing.assert_series_equal(
+        df[("col1",)], pd.Series([1, 2], name=("col1",), index=df.index)
+    )
+    pd.testing.assert_series_equal(
+        df[("col2",)], pd.Series(["a", "b"], name=("col2",), index=df.index)
+    )
+    markdown_str = df.to_markdown()
+    expected_markdown_str = """
+|           |   ('col1',) | ('col2',)   |
+|:----------|------------:|:------------|
+| ('idx1',) |           1 | a           |
+| ('idx2',) |           2 | b           |
+"""
+    assert markdown_str.strip() == expected_markdown_str.strip()
+
+
+def test_get_chart_dataframe_valid_temporal(monkeypatch: pytest.MonkeyPatch):
+    monkeypatch.setattr(csv, "get_chart_csv_data", 
fake_get_chart_csv_data_temporal)
+    df = get_chart_dataframe("http://dummy-url";)
+    expected_columns = pd.MultiIndex.from_tuples([("date",), ("val",)])
+    assert df is not None
+    pd.testing.assert_index_equal(df.columns, expected_columns)
+
+    expected_index = pd.MultiIndex.from_tuples([(0,), (1,)])
+    pd.testing.assert_index_equal(df.index, expected_index)
+
+    assert is_datetime64_any_dtype(df[("date",)])
+    expected_dates = pd.to_datetime([1609459200000, 1612137600000], 
unit="ms").astype(
+        "datetime64[ms]"
+    )
+    actual_dates = df[("date",)].reset_index(drop=True)
+    pd.testing.assert_series_equal(
+        actual_dates, pd.Series(expected_dates, name=("date",)), 
check_names=False
+    )
+    pd.testing.assert_series_equal(
+        df[("val",)], pd.Series([10, 20], name=("val",), index=df.index)
+    )
+    markdown_str = df.to_markdown()
+    expected_markdown_str = """
+|      | ('date',)           |   ('val',) |
+|:-----|:--------------------|-----------:|
+| (0,) | 2021-01-01 00:00:00 |         10 |
+| (1,) | 2021-02-01 00:00:00 |         20 |
+"""
+    assert markdown_str.strip() == expected_markdown_str.strip()
+
+
+def test_get_chart_dataframe_with_hierarchical_columns(monkeypatch: 
pytest.MonkeyPatch):
+    monkeypatch.setattr(csv, "get_chart_csv_data", 
fake_get_chart_csv_data_hierarchical)
+    df = get_chart_dataframe("http://dummy-url";)
+    assert df is not None
+    expected_columns = pd.MultiIndex.from_tuples([("level1", "a")])
+    pd.testing.assert_index_equal(df.columns, expected_columns)
+
+    expected_index = pd.MultiIndex.from_tuples([("idx",)] * len(df))
+    pd.testing.assert_index_equal(df.index, expected_index)
+
+    pd.testing.assert_series_equal(
+        df[("level1", "a")], pd.Series([1, 2], name=("level1", "a"), 
index=df.index)
+    )
+    markdown_str = df.to_markdown()
+    expected_markdown_str = """
+|          |   ('level1', 'a') |
+|:---------|------------------:|
+| ('idx',) |                 1 |
+| ('idx',) |                 2 |
+"""
+    assert markdown_str.strip() == expected_markdown_str.strip()
+
+
+def test_get_chart_dataframe_default_range_index(monkeypatch: 
pytest.MonkeyPatch):
+    monkeypatch.setattr(csv, "get_chart_csv_data", 
fake_get_chart_csv_data_default)
+    df = get_chart_dataframe("http://dummy-url";)
+    assert df is not None
+    expected_columns = pd.MultiIndex.from_tuples([("col1",), ("col2",)])
+    pd.testing.assert_index_equal(df.columns, expected_columns)
+    # When indexnames is empty, the DataFrame should have a default RangeIndex
+    assert isinstance(df.index, pd.RangeIndex)
+    pd.testing.assert_series_equal(df[("col1",)], pd.Series([1, 2], 
name=("col1",)))
+    pd.testing.assert_series_equal(df[("col2",)], pd.Series(["a", "b"], 
name=("col2",)))

(superset) 01/01: add more csv tests

Reply via email to