(tsfile) branch develop updated: Fix empty TAG column result in to_dataframe when querying table model. (#730)

colinlee Wed, 25 Feb 2026 01:59:57 -0800

This is an automated email from the ASF dual-hosted git repository.

colinlee pushed a commit to branch develop
in repository https://gitbox.apache.org/repos/asf/tsfile.git



The following commit(s) were added to refs/heads/develop by this push:
     new ebb4d974 Fix empty TAG column result in to_dataframe when querying 
table model. (#730)
ebb4d974 is described below

commit ebb4d97477dcb408518fe24bf01b5769ee37e7a3
Author: Colin Lee <[email protected]>
AuthorDate: Wed Feb 25 17:59:44 2026 +0800

    Fix empty TAG column result in to_dataframe when querying table model. 
(#730)
---
 python/tests/test_load_tsfile_from_iotdb.py | 17 +++++++++++++--
 python/tsfile/utils.py                      | 34 ++++++++++++++++++++++-------
 2 files changed, 41 insertions(+), 10 deletions(-)

diff --git a/python/tests/test_load_tsfile_from_iotdb.py 
b/python/tests/test_load_tsfile_from_iotdb.py
index 50ca0baf..21347c9e 100644
--- a/python/tests/test_load_tsfile_from_iotdb.py
+++ b/python/tests/test_load_tsfile_from_iotdb.py
@@ -51,6 +51,7 @@ def test_load_tsfile_from_iotdb():
             (1760106080000 + 1760106109000) * 30 // 2
     )
     assert df["s0"].isna().sum() == 0
+    df_s0 = df["s0"]
     assert df["s1"].isna().sum() == 0
     assert df["s2"].isna().sum() == 8
     assert df["s3"].isna().sum() == 0
@@ -73,6 +74,12 @@ def test_load_tsfile_from_iotdb():
     assert df["s8"].isna().sum() == 0
     assert df["s8"].nunique() == 60
     assert df["s9"].isna().sum() == 8
+
+    df = ts.to_dataframe(simple_tabl1_path, table_name="test", 
column_names=["s0"])
+    assert len(df) == 60
+    assert len(df.columns) == 2
+    assert df["s0"].equals(df_s0)
+
     ## ---------
 
     simple_tabl2_path = os.path.join(dir_path, 'simple_table_t2.tsfile')
@@ -118,17 +125,23 @@ def test_load_tsfile_from_iotdb():
     assert math.isclose(df["temperature"].sum(), 2.5, rel_tol=1e-9)
     assert math.isclose(df["humidity"].sum(), 2.5, rel_tol=1e-9)
     assert (df["region_id"] == "loc").sum() == 25
+    df_id = df["id"]
 
-    df = ts.to_dataframe(table_with_time_column_path, table_name="table2", 
column_names=["region_id", "temperature", "humidity"])
+    df = ts.to_dataframe(table_with_time_column_path, table_name="table2",
+                         column_names=["region_id", "temperature", "humidity"])
     assert list(df.columns)[0] == "id"
     assert len(df) == 25
     assert math.isclose(df["temperature"].sum(), 2.5, rel_tol=1e-9)
     assert (df["region_id"] == "loc").sum() == 25
 
-    df = ts.to_dataframe(table_with_time_column_path, table_name="table2", 
column_names=["id", "temperature", "humidity"])
+    df = ts.to_dataframe(table_with_time_column_path, table_name="table2",
+                         column_names=["id", "temperature", "humidity"])
     assert list(df.columns)[0] == "time"
     assert df["id"].equals(df["time"])
     assert len(df) == 25
     assert math.isclose(df["temperature"].sum(), 2.5, rel_tol=1e-9)
     assert math.isclose(df["humidity"].sum(), 2.5, rel_tol=1e-9)
 
+    df = ts.to_dataframe(table_with_time_column_path, table_name="table2", 
column_names=["id"])
+    assert len(df.columns) == 2
+    assert df_id.equals(df["id"])
diff --git a/python/tsfile/utils.py b/python/tsfile/utils.py
index 6044ddbb..2e5fc05f 100644
--- a/python/tsfile/utils.py
+++ b/python/tsfile/utils.py
@@ -22,7 +22,7 @@ import numpy as np
 import pandas as pd
 from pandas.core.dtypes.common import is_integer_dtype, is_object_dtype
 
-from tsfile import ColumnSchema, TableSchema, ColumnCategory, TSDataType
+from tsfile import ColumnSchema, TableSchema, ColumnCategory, TSDataType, 
TIME_COLUMN
 from tsfile.exceptions import TableNotExistError, ColumnNotExistError
 from tsfile.tsfile_reader import TsFileReaderPy
 from tsfile.tsfile_table_writer import TsFileTableWriter, 
infer_object_column_type, validate_dataframe_for_tsfile
@@ -116,10 +116,16 @@ def to_dataframe(file_path: str,
 
             is_tree_model = len(table_schema) == 0
             time_column = None
+            column_name_to_query = []
+            no_field_query = True
             if is_tree_model:
                 if _column_names is None:
                     print("columns name is None, return all columns")
+                # When querying tables in the tree, only measurements are 
allowed currently.
+                no_field_query = False
             else:
+                _table_name = _table_name.lower() if _table_name else None
+                _column_names = [column.lower() for column in _column_names] 
if _column_names else None
                 if _table_name is None:
                     _table_name, table_schema = 
next(iter(table_schema.items()))
                 else:
@@ -137,17 +143,26 @@ def to_dataframe(file_path: str,
 
                 if _column_names is not None:
                     for column in _column_names:
-                        if column.lower() not in column_names_in_file and 
column.lower() != time_column :
+                        if column not in column_names_in_file and column != 
time_column:
                             raise ColumnNotExistError(column)
+                        if table_schema.get_column(column).get_category() == 
ColumnCategory.FIELD:
+                            no_field_query = False
+                    if no_field_query:
+                        if time_column is not None:
+                            column_name_to_query.append(time_column)
+                        column_name_to_query.extend(column_names_in_file)
+                    else:
+                        column_name_to_query = _column_names
                 else:
-                    _column_names = column_names_in_file
+                    no_field_query = False
+                    column_name_to_query = column_names_in_file
 
             if is_tree_model:
-                if _column_names is None:
-                    _column_names = []
-                query_result = reader.query_table_on_tree(_column_names, 
_start_time, _end_time)
+                if _column_names is not None:
+                    column_name_to_query = _column_names
+                query_result = 
reader.query_table_on_tree(column_name_to_query, _start_time, _end_time)
             else:
-                query_result = reader.query_table(_table_name, _column_names, 
_start_time, _end_time)
+                query_result = reader.query_table(_table_name, 
column_name_to_query, _start_time, _end_time)
 
             with query_result as result:
                 while result.next():
@@ -164,8 +179,11 @@ def to_dataframe(file_path: str,
                         continue
                     total_rows += len(dataframe)
                     if time_column is not None:
-                        if _column_names is None or time_column.lower() not in 
[c.lower() for c in _column_names]:
+                        if _column_names is None or time_column not in 
_column_names:
                             dataframe = 
dataframe.rename(columns={dataframe.columns[0]: time_column})
+                    if no_field_query and _column_names is not None:
+                        _column_names.insert(0, TIME_COLUMN)
+                        dataframe = dataframe[_column_names]
                     yield dataframe
                     if (not is_iterator) and max_row_num is not None and 
total_rows >= max_row_num:
                         break

(tsfile) branch develop updated: Fix empty TAG column result in to_dataframe when querying table model. (#730)

Reply via email to