This is an automated email from the ASF dual-hosted git repository.
colinlee pushed a commit to branch support_dataframe_to_tsfile
in repository https://gitbox.apache.org/repos/asf/tsfile.git
The following commit(s) were added to refs/heads/support_dataframe_to_tsfile by
this push:
new 9e1edf5e fix sort data.
9e1edf5e is described below
commit 9e1edf5ec4c95fa444c52e1d1f626b38ae45d790
Author: ColinLee <[email protected]>
AuthorDate: Fri Jan 16 11:26:31 2026 +0800
fix sort data.
---
python/tests/test_to_tsfile.py | 30 +++++++++++++++++++++++++++++-
python/tsfile/tsfile_table_writer.py | 20 +++++++++++++++++++-
python/tsfile/utils.py | 6 ++++--
3 files changed, 52 insertions(+), 4 deletions(-)
diff --git a/python/tests/test_to_tsfile.py b/python/tests/test_to_tsfile.py
index 0928f1a9..7c1fb84c 100644
--- a/python/tests/test_to_tsfile.py
+++ b/python/tests/test_to_tsfile.py
@@ -205,7 +205,7 @@ def test_dataframe_to_tsfile_default_table_name():
dataframe_to_tsfile(df, tsfile_path)
- df_read = to_dataframe(tsfile_path, table_name="table")
+ df_read = to_dataframe(tsfile_path,
table_name="test_dataframe_to_tsfile_default_name")
assert df_read.shape == (10, 2)
finally:
if os.path.exists(tsfile_path):
@@ -343,3 +343,31 @@ def test_dataframe_to_tsfile_string_vs_blob():
finally:
if os.path.exists(tsfile_path):
os.remove(tsfile_path)
+
+
+def test_dataframe_to_tsfile_tag_time_unsorted():
+ tsfile_path = "test_dataframe_to_tsfile_tag_time_unsorted.tsfile"
+ try:
+ if os.path.exists(tsfile_path):
+ os.remove(tsfile_path)
+
+ df = pd.DataFrame({
+ 'time': [30, 10, 20, 50, 40, 15, 25, 35, 5, 45],
+ 'device': ['device1', 'device1', 'device1', 'device2', 'device2',
'device1', 'device1', 'device2',
+ 'device1', 'device2'],
+ 'value': [i * 1.5 for i in range(10)]
+ })
+
+ dataframe_to_tsfile(df, tsfile_path, table_name="test_table",
tag_column=["device"])
+
+ df_read = to_dataframe(tsfile_path, table_name="test_table")
+ df_expected = df.sort_values(by=['device',
'time']).reset_index(drop=True)
+ df_expected = convert_to_nullable_types(df_expected)
+
+ assert df_read.shape == (10, 3)
+ assert df_read["device"].equals(df_expected["device"])
+ assert df_read["time"].equals(df_expected["time"])
+ assert df_read["value"].equals(df_expected["value"])
+ finally:
+ if os.path.exists(tsfile_path):
+ os.remove(tsfile_path)
diff --git a/python/tsfile/tsfile_table_writer.py
b/python/tsfile/tsfile_table_writer.py
index 56f9c341..5b33f9b2 100644
--- a/python/tsfile/tsfile_table_writer.py
+++ b/python/tsfile/tsfile_table_writer.py
@@ -18,7 +18,7 @@
import pandas as pd
from tsfile import TableSchema, Tablet, TableNotExistError
-from tsfile import TsFileWriter
+from tsfile import TsFileWriter, ColumnCategory
from tsfile.constants import TSDataType
from tsfile.exceptions import ColumnNotExistError, TypeMismatchError
@@ -119,6 +119,24 @@ class TsFileTableWriter:
context=f"Type mismatches: {'; '.join(type_mismatches)}"
)
+ tag_columns = []
+ for col in self.tableSchema.get_columns():
+ if col.get_category() == ColumnCategory.TAG:
+ tag_col_name = col.get_column_name()
+ if tag_col_name in df_column_name_map:
+ tag_columns.append(df_column_name_map[tag_col_name])
+
+ time_column = None
+ for col in dataframe.columns:
+ if col.lower() == 'time':
+ time_column = col
+ break
+
+ if time_column:
+ sort_by = tag_columns.copy()
+ sort_by.append(time_column)
+ dataframe = dataframe.sort_values(by=sort_by)
+
self.writer.write_dataframe(self.tableSchema.get_table_name(),
dataframe)
def close(self):
diff --git a/python/tsfile/utils.py b/python/tsfile/utils.py
index f3c2adc5..567c4fe1 100644
--- a/python/tsfile/utils.py
+++ b/python/tsfile/utils.py
@@ -15,6 +15,7 @@
# specific language governing permissions and limitations
# under the License.
#
+from pathlib import Path
from typing import Iterator, Union
from typing import Optional
@@ -188,7 +189,7 @@ def dataframe_to_tsfile(dataframe: pd.DataFrame,
Path to the TsFile to write. Will be created if it doesn't exist.
table_name : Optional[str], default None
- Name of the table. If None, defaults to "table".
+ Name of the table. If None, defaults to tsfile file name.
time_column : Optional[str], default None
Name of the time column. If None, will look for a column named 'time'
(case-insensitive),
@@ -211,7 +212,8 @@ def dataframe_to_tsfile(dataframe: pd.DataFrame,
raise ValueError("DataFrame cannot be None or empty")
if table_name is None:
- table_name = "table"
+ filename = Path(file_path).stem
+ table_name = filename
time_col_name = None
if time_column is not None: