This is an automated email from the ASF dual-hosted git repository.
colinlee pushed a commit to branch develop
in repository https://gitbox.apache.org/repos/asf/tsfile.git
The following commit(s) were added to refs/heads/develop by this push:
new ebb4d974 Fix empty TAG column result in to_dataframe when querying
table model. (#730)
ebb4d974 is described below
commit ebb4d97477dcb408518fe24bf01b5769ee37e7a3
Author: Colin Lee <[email protected]>
AuthorDate: Wed Feb 25 17:59:44 2026 +0800
Fix empty TAG column result in to_dataframe when querying table model.
(#730)
---
python/tests/test_load_tsfile_from_iotdb.py | 17 +++++++++++++--
python/tsfile/utils.py | 34 ++++++++++++++++++++++-------
2 files changed, 41 insertions(+), 10 deletions(-)
diff --git a/python/tests/test_load_tsfile_from_iotdb.py
b/python/tests/test_load_tsfile_from_iotdb.py
index 50ca0baf..21347c9e 100644
--- a/python/tests/test_load_tsfile_from_iotdb.py
+++ b/python/tests/test_load_tsfile_from_iotdb.py
@@ -51,6 +51,7 @@ def test_load_tsfile_from_iotdb():
(1760106080000 + 1760106109000) * 30 // 2
)
assert df["s0"].isna().sum() == 0
+ df_s0 = df["s0"]
assert df["s1"].isna().sum() == 0
assert df["s2"].isna().sum() == 8
assert df["s3"].isna().sum() == 0
@@ -73,6 +74,12 @@ def test_load_tsfile_from_iotdb():
assert df["s8"].isna().sum() == 0
assert df["s8"].nunique() == 60
assert df["s9"].isna().sum() == 8
+
+ df = ts.to_dataframe(simple_tabl1_path, table_name="test",
column_names=["s0"])
+ assert len(df) == 60
+ assert len(df.columns) == 2
+ assert df["s0"].equals(df_s0)
+
## ---------
simple_tabl2_path = os.path.join(dir_path, 'simple_table_t2.tsfile')
@@ -118,17 +125,23 @@ def test_load_tsfile_from_iotdb():
assert math.isclose(df["temperature"].sum(), 2.5, rel_tol=1e-9)
assert math.isclose(df["humidity"].sum(), 2.5, rel_tol=1e-9)
assert (df["region_id"] == "loc").sum() == 25
+ df_id = df["id"]
- df = ts.to_dataframe(table_with_time_column_path, table_name="table2",
column_names=["region_id", "temperature", "humidity"])
+ df = ts.to_dataframe(table_with_time_column_path, table_name="table2",
+ column_names=["region_id", "temperature", "humidity"])
assert list(df.columns)[0] == "id"
assert len(df) == 25
assert math.isclose(df["temperature"].sum(), 2.5, rel_tol=1e-9)
assert (df["region_id"] == "loc").sum() == 25
- df = ts.to_dataframe(table_with_time_column_path, table_name="table2",
column_names=["id", "temperature", "humidity"])
+ df = ts.to_dataframe(table_with_time_column_path, table_name="table2",
+ column_names=["id", "temperature", "humidity"])
assert list(df.columns)[0] == "time"
assert df["id"].equals(df["time"])
assert len(df) == 25
assert math.isclose(df["temperature"].sum(), 2.5, rel_tol=1e-9)
assert math.isclose(df["humidity"].sum(), 2.5, rel_tol=1e-9)
+ df = ts.to_dataframe(table_with_time_column_path, table_name="table2",
column_names=["id"])
+ assert len(df.columns) == 2
+ assert df_id.equals(df["id"])
diff --git a/python/tsfile/utils.py b/python/tsfile/utils.py
index 6044ddbb..2e5fc05f 100644
--- a/python/tsfile/utils.py
+++ b/python/tsfile/utils.py
@@ -22,7 +22,7 @@ import numpy as np
import pandas as pd
from pandas.core.dtypes.common import is_integer_dtype, is_object_dtype
-from tsfile import ColumnSchema, TableSchema, ColumnCategory, TSDataType
+from tsfile import ColumnSchema, TableSchema, ColumnCategory, TSDataType,
TIME_COLUMN
from tsfile.exceptions import TableNotExistError, ColumnNotExistError
from tsfile.tsfile_reader import TsFileReaderPy
from tsfile.tsfile_table_writer import TsFileTableWriter,
infer_object_column_type, validate_dataframe_for_tsfile
@@ -116,10 +116,16 @@ def to_dataframe(file_path: str,
is_tree_model = len(table_schema) == 0
time_column = None
+ column_name_to_query = []
+ no_field_query = True
if is_tree_model:
if _column_names is None:
print("columns name is None, return all columns")
+ # When querying tables in the tree, only measurements are
allowed currently.
+ no_field_query = False
else:
+ _table_name = _table_name.lower() if _table_name else None
+ _column_names = [column.lower() for column in _column_names]
if _column_names else None
if _table_name is None:
_table_name, table_schema =
next(iter(table_schema.items()))
else:
@@ -137,17 +143,26 @@ def to_dataframe(file_path: str,
if _column_names is not None:
for column in _column_names:
- if column.lower() not in column_names_in_file and
column.lower() != time_column :
+ if column not in column_names_in_file and column !=
time_column:
raise ColumnNotExistError(column)
+ if table_schema.get_column(column).get_category() ==
ColumnCategory.FIELD:
+ no_field_query = False
+ if no_field_query:
+ if time_column is not None:
+ column_name_to_query.append(time_column)
+ column_name_to_query.extend(column_names_in_file)
+ else:
+ column_name_to_query = _column_names
else:
- _column_names = column_names_in_file
+ no_field_query = False
+ column_name_to_query = column_names_in_file
if is_tree_model:
- if _column_names is None:
- _column_names = []
- query_result = reader.query_table_on_tree(_column_names,
_start_time, _end_time)
+ if _column_names is not None:
+ column_name_to_query = _column_names
+ query_result =
reader.query_table_on_tree(column_name_to_query, _start_time, _end_time)
else:
- query_result = reader.query_table(_table_name, _column_names,
_start_time, _end_time)
+ query_result = reader.query_table(_table_name,
column_name_to_query, _start_time, _end_time)
with query_result as result:
while result.next():
@@ -164,8 +179,11 @@ def to_dataframe(file_path: str,
continue
total_rows += len(dataframe)
if time_column is not None:
- if _column_names is None or time_column.lower() not in
[c.lower() for c in _column_names]:
+ if _column_names is None or time_column not in
_column_names:
dataframe =
dataframe.rename(columns={dataframe.columns[0]: time_column})
+ if no_field_query and _column_names is not None:
+ _column_names.insert(0, TIME_COLUMN)
+ dataframe = dataframe[_column_names]
yield dataframe
if (not is_iterator) and max_row_num is not None and
total_rows >= max_row_num:
break