This is an automated email from the ASF dual-hosted git repository. colinlee pushed a commit to branch support_dataframe_to_tsfile in repository https://gitbox.apache.org/repos/asf/tsfile.git
commit 56aa10499bdb0ab113dd49390a84853ee1ee1378 Author: ColinLee <[email protected]> AuthorDate: Thu Feb 12 23:16:24 2026 +0800 support time column --- python/tests/resources/README.md | 70 ++++++++++++++++++----------- python/tests/test_load_tsfile_from_iotdb.py | 16 ++++++- python/tests/test_to_tsfile.py | 4 +- python/tsfile/schema.py | 4 +- python/tsfile/utils.py | 9 ++-- 5 files changed, 66 insertions(+), 37 deletions(-) diff --git a/python/tests/resources/README.md b/python/tests/resources/README.md index cd1a2aa0..d5ec82b4 100644 --- a/python/tests/resources/README.md +++ b/python/tests/resources/README.md @@ -287,31 +287,49 @@ Total line number = 40 In `table_with_time_column.tsfile` ``` - time region_id temperature humidity -0 1770729095888 loc 0.1 0.1 -1 1770729096807 loc 0.1 0.1 -2 1770729097233 loc 0.1 0.1 -3 1770729097471 loc 0.1 0.1 -4 1770729097695 loc 0.1 0.1 -5 1770729097910 loc 0.1 0.1 -6 1770729098148 loc 0.1 0.1 -7 1770729098385 loc 0.1 0.1 -8 1770729098599 loc 0.1 0.1 -9 1770729098853 loc 0.1 0.1 -10 1770729099086 loc 0.1 0.1 -11 1770729099327 loc 0.1 0.1 -12 1770729099558 loc 0.1 0.1 -13 1770729099794 loc 0.1 0.1 -14 1770729100017 loc 0.1 0.1 -15 1770729100262 loc 0.1 0.1 -16 1770729100492 loc 0.1 0.1 -17 1770729100729 loc 0.1 0.1 -18 1770729100976 loc 0.1 0.1 -19 1770729101243 loc 0.1 0.1 -20 1770729101494 loc 0.1 0.1 -21 1770729101734 loc 0.1 0.1 -22 1770729102040 loc 0.1 0.1 -23 1770729102333 loc 0.1 0.1 -24 1770729103005 loc 0.1 0.1 +IoTDB:mydb> select * from table2; ++-----------------------------+---------+-----------+--------+ +| id|region_id|temperature|humidity| ++-----------------------------+---------+-----------+--------+ +|2026-02-10T21:11:35.888+08:00| loc| 0.1| 0.1| +|2026-02-10T21:11:36.807+08:00| loc| 0.1| 0.1| +|2026-02-10T21:11:37.233+08:00| loc| 0.1| 0.1| +|2026-02-10T21:11:37.471+08:00| loc| 0.1| 0.1| +|2026-02-10T21:11:37.695+08:00| loc| 0.1| 0.1| +|2026-02-10T21:11:37.910+08:00| loc| 0.1| 0.1| +|2026-02-10T21:11:38.148+08:00| loc| 0.1| 0.1| +|2026-02-10T21:11:38.385+08:00| loc| 0.1| 0.1| +|2026-02-10T21:11:38.599+08:00| loc| 0.1| 0.1| +|2026-02-10T21:11:38.853+08:00| loc| 0.1| 0.1| +|2026-02-10T21:11:39.086+08:00| loc| 0.1| 0.1| +|2026-02-10T21:11:39.327+08:00| loc| 0.1| 0.1| +|2026-02-10T21:11:39.558+08:00| loc| 0.1| 0.1| +|2026-02-10T21:11:39.794+08:00| loc| 0.1| 0.1| +|2026-02-10T21:11:40.017+08:00| loc| 0.1| 0.1| +|2026-02-10T21:11:40.262+08:00| loc| 0.1| 0.1| +|2026-02-10T21:11:40.492+08:00| loc| 0.1| 0.1| +|2026-02-10T21:11:40.729+08:00| loc| 0.1| 0.1| +|2026-02-10T21:11:40.976+08:00| loc| 0.1| 0.1| +|2026-02-10T21:11:41.243+08:00| loc| 0.1| 0.1| +|2026-02-10T21:11:41.494+08:00| loc| 0.1| 0.1| +|2026-02-10T21:11:41.734+08:00| loc| 0.1| 0.1| +|2026-02-10T21:11:42.040+08:00| loc| 0.1| 0.1| +|2026-02-10T21:11:42.333+08:00| loc| 0.1| 0.1| +|2026-02-10T21:11:43.005+08:00| loc| 0.1| 0.1| ++-----------------------------+---------+-----------+--------+ +Total line number = 25 +It costs 0.042s +IoTDB:mydb> describe table2 ++-----------+---------+--------+ +| ColumnName| DataType|Category| ++-----------+---------+--------+ +| id|TIMESTAMP| TIME| +| region_id| STRING| TAG| +|temperature| FLOAT| FIELD| +| humidity| DOUBLE| FIELD| ++-----------+---------+--------+ +Total line number = 4 +It costs 0.065s +IoTDB:mydb> ``` diff --git a/python/tests/test_load_tsfile_from_iotdb.py b/python/tests/test_load_tsfile_from_iotdb.py index 8dcc0b1c..50ca0baf 100644 --- a/python/tests/test_load_tsfile_from_iotdb.py +++ b/python/tests/test_load_tsfile_from_iotdb.py @@ -111,10 +111,24 @@ def test_load_tsfile_from_iotdb(): assert df["s9"].isna().sum() == 5 ## --------- table_with_time_column_path = os.path.join(dir_path, 'table_with_time_column.tsfile') - df = ts.to_dataframe(table_with_time_column_path) + df = ts.to_dataframe(table_with_time_column_path) + assert list(df.columns)[0] == "id" assert len(df) == 25 assert math.isclose(df["temperature"].sum(), 2.5, rel_tol=1e-9) assert math.isclose(df["humidity"].sum(), 2.5, rel_tol=1e-9) assert (df["region_id"] == "loc").sum() == 25 + df = ts.to_dataframe(table_with_time_column_path, table_name="table2", column_names=["region_id", "temperature", "humidity"]) + assert list(df.columns)[0] == "id" + assert len(df) == 25 + assert math.isclose(df["temperature"].sum(), 2.5, rel_tol=1e-9) + assert (df["region_id"] == "loc").sum() == 25 + + df = ts.to_dataframe(table_with_time_column_path, table_name="table2", column_names=["id", "temperature", "humidity"]) + assert list(df.columns)[0] == "time" + assert df["id"].equals(df["time"]) + assert len(df) == 25 + assert math.isclose(df["temperature"].sum(), 2.5, rel_tol=1e-9) + assert math.isclose(df["humidity"].sum(), 2.5, rel_tol=1e-9) + diff --git a/python/tests/test_to_tsfile.py b/python/tests/test_to_tsfile.py index a35d5e89..4e048188 100644 --- a/python/tests/test_to_tsfile.py +++ b/python/tests/test_to_tsfile.py @@ -132,11 +132,11 @@ def test_dataframe_to_tsfile_custom_time_column(): dataframe_to_tsfile(df, tsfile_path, table_name="test_table", time_column="timestamp") df_read = to_dataframe(tsfile_path, table_name="test_table") - df_read = df_read.sort_values(TIME_COLUMN).reset_index(drop=True) + df_read = df_read.sort_values("timestamp").reset_index(drop=True) df_sorted = convert_to_nullable_types(df.sort_values('timestamp').reset_index(drop=True)) assert df_read.shape == (30, 3) - assert df_read[TIME_COLUMN].equals(df_sorted["timestamp"]) + assert df_read["timestamp"].equals(df_sorted["timestamp"]) assert df_read["device"].equals(df_sorted["device"]) assert df_read["value"].equals(df_sorted["value"]) finally: diff --git a/python/tsfile/schema.py b/python/tsfile/schema.py index d8671a33..c89649bf 100644 --- a/python/tsfile/schema.py +++ b/python/tsfile/schema.py @@ -119,7 +119,7 @@ class TableSchema: self.table_name = table_name.lower() if len(columns) == 0: raise ValueError("Columns cannot be empty") - self.columns = [] + self.columns = columns for column in columns: if column.get_category() == ColumnCategory.TIME: if self.time_column is not None: @@ -128,8 +128,6 @@ class TableSchema: f"'{self.time_column.get_column_name()}' and '{column.get_column_name()}'" ) self.time_column = column - else: - self.columns.append(column) def get_table_name(self): return self.table_name diff --git a/python/tsfile/utils.py b/python/tsfile/utils.py index 4366ef5b..6044ddbb 100644 --- a/python/tsfile/utils.py +++ b/python/tsfile/utils.py @@ -115,22 +115,21 @@ def to_dataframe(file_path: str, table_schema = reader.get_all_table_schemas() is_tree_model = len(table_schema) == 0 - + time_column = None if is_tree_model: if _column_names is None: print("columns name is None, return all columns") else: if _table_name is None: - _table_name, columns = next(iter(table_schema.items())) + _table_name, table_schema = next(iter(table_schema.items())) else: _table_name = _table_name.lower() if _table_name.lower() not in table_schema: raise TableNotExistError(_table_name) - columns = table_schema[_table_name] + table_schema = table_schema[_table_name] column_names_in_file = [] - time_column = None - for column in columns: + for column in table_schema.get_columns(): if column.get_category() == ColumnCategory.TIME: time_column = column.get_column_name() else:
