(tsfile) branch support_dataframe_to_tsfile updated: fix sort data.

colinlee Thu, 15 Jan 2026 19:28:42 -0800

This is an automated email from the ASF dual-hosted git repository.

colinlee pushed a commit to branch support_dataframe_to_tsfile
in repository https://gitbox.apache.org/repos/asf/tsfile.git



The following commit(s) were added to refs/heads/support_dataframe_to_tsfile by 
this push:
     new 9e1edf5e fix sort data.
9e1edf5e is described below

commit 9e1edf5ec4c95fa444c52e1d1f626b38ae45d790
Author: ColinLee <[email protected]>
AuthorDate: Fri Jan 16 11:26:31 2026 +0800

    fix sort data.
---
 python/tests/test_to_tsfile.py       | 30 +++++++++++++++++++++++++++++-
 python/tsfile/tsfile_table_writer.py | 20 +++++++++++++++++++-
 python/tsfile/utils.py               |  6 ++++--
 3 files changed, 52 insertions(+), 4 deletions(-)

diff --git a/python/tests/test_to_tsfile.py b/python/tests/test_to_tsfile.py
index 0928f1a9..7c1fb84c 100644
--- a/python/tests/test_to_tsfile.py
+++ b/python/tests/test_to_tsfile.py
@@ -205,7 +205,7 @@ def test_dataframe_to_tsfile_default_table_name():
 
         dataframe_to_tsfile(df, tsfile_path)
 
-        df_read = to_dataframe(tsfile_path, table_name="table")
+        df_read = to_dataframe(tsfile_path, 
table_name="test_dataframe_to_tsfile_default_name")
         assert df_read.shape == (10, 2)
     finally:
         if os.path.exists(tsfile_path):
@@ -343,3 +343,31 @@ def test_dataframe_to_tsfile_string_vs_blob():
     finally:
         if os.path.exists(tsfile_path):
             os.remove(tsfile_path)
+
+
+def test_dataframe_to_tsfile_tag_time_unsorted():
+    tsfile_path = "test_dataframe_to_tsfile_tag_time_unsorted.tsfile"
+    try:
+        if os.path.exists(tsfile_path):
+            os.remove(tsfile_path)
+
+        df = pd.DataFrame({
+            'time': [30, 10, 20, 50, 40, 15, 25, 35, 5, 45],
+            'device': ['device1', 'device1', 'device1', 'device2', 'device2', 
'device1', 'device1', 'device2',
+                       'device1', 'device2'],
+            'value': [i * 1.5 for i in range(10)]
+        })
+
+        dataframe_to_tsfile(df, tsfile_path, table_name="test_table", 
tag_column=["device"])
+
+        df_read = to_dataframe(tsfile_path, table_name="test_table")
+        df_expected = df.sort_values(by=['device', 
'time']).reset_index(drop=True)
+        df_expected = convert_to_nullable_types(df_expected)
+
+        assert df_read.shape == (10, 3)
+        assert df_read["device"].equals(df_expected["device"])
+        assert df_read["time"].equals(df_expected["time"])
+        assert df_read["value"].equals(df_expected["value"])
+    finally:
+        if os.path.exists(tsfile_path):
+            os.remove(tsfile_path)
diff --git a/python/tsfile/tsfile_table_writer.py 
b/python/tsfile/tsfile_table_writer.py
index 56f9c341..5b33f9b2 100644
--- a/python/tsfile/tsfile_table_writer.py
+++ b/python/tsfile/tsfile_table_writer.py
@@ -18,7 +18,7 @@
 import pandas as pd
 
 from tsfile import TableSchema, Tablet, TableNotExistError
-from tsfile import TsFileWriter
+from tsfile import TsFileWriter, ColumnCategory
 from tsfile.constants import TSDataType
 from tsfile.exceptions import ColumnNotExistError, TypeMismatchError
 
@@ -119,6 +119,24 @@ class TsFileTableWriter:
                 context=f"Type mismatches: {'; '.join(type_mismatches)}"
             )
 
+        tag_columns = []
+        for col in self.tableSchema.get_columns():
+            if col.get_category() == ColumnCategory.TAG:
+                tag_col_name = col.get_column_name()
+                if tag_col_name in df_column_name_map:
+                    tag_columns.append(df_column_name_map[tag_col_name])
+
+        time_column = None
+        for col in dataframe.columns:
+            if col.lower() == 'time':
+                time_column = col
+                break
+
+        if time_column:
+            sort_by = tag_columns.copy()
+            sort_by.append(time_column)
+            dataframe = dataframe.sort_values(by=sort_by)
+
         self.writer.write_dataframe(self.tableSchema.get_table_name(), 
dataframe)
 
     def close(self):
diff --git a/python/tsfile/utils.py b/python/tsfile/utils.py
index f3c2adc5..567c4fe1 100644
--- a/python/tsfile/utils.py
+++ b/python/tsfile/utils.py
@@ -15,6 +15,7 @@
 # specific language governing permissions and limitations
 # under the License.
 #
+from pathlib import Path
 from typing import Iterator, Union
 from typing import Optional
 
@@ -188,7 +189,7 @@ def dataframe_to_tsfile(dataframe: pd.DataFrame,
         Path to the TsFile to write. Will be created if it doesn't exist.
 
     table_name : Optional[str], default None
-        Name of the table. If None, defaults to "table".
+        Name of the table. If None, defaults to tsfile file name.
 
     time_column : Optional[str], default None
         Name of the time column. If None, will look for a column named 'time' 
(case-insensitive),
@@ -211,7 +212,8 @@ def dataframe_to_tsfile(dataframe: pd.DataFrame,
         raise ValueError("DataFrame cannot be None or empty")
 
     if table_name is None:
-        table_name = "table"
+        filename = Path(file_path).stem
+        table_name = filename
 
     time_col_name = None
     if time_column is not None:

(tsfile) branch support_dataframe_to_tsfile updated: fix sort data.

Reply via email to