This is an automated email from the ASF dual-hosted git repository.
colinlee pushed a commit to branch support_dataframe_to_tsfile
in repository https://gitbox.apache.org/repos/asf/tsfile.git
The following commit(s) were added to refs/heads/support_dataframe_to_tsfile by
this push:
new d6aa3e15 fix.
d6aa3e15 is described below
commit d6aa3e154321e633428624d60bf75e97749b4e4e
Author: ColinLee <[email protected]>
AuthorDate: Wed Jan 14 22:34:04 2026 +0800
fix.
---
python/lower_case_name.tsfile | Bin 0 -> 23089 bytes
python/record_write_and_read.tsfile | Bin 0 -> 9062 bytes
python/test1.tsfile | Bin 0 -> 23089 bytes
python/tests/test_to_tsfile.py | 106 +++++++++++++++++------------------
python/tsfile/__init__.py | 2 +-
python/tsfile/tsfile_table_writer.py | 10 +++-
python/tsfile/utils.py | 31 ++++------
7 files changed, 75 insertions(+), 74 deletions(-)
diff --git a/python/lower_case_name.tsfile b/python/lower_case_name.tsfile
new file mode 100644
index 00000000..9d570ee0
Binary files /dev/null and b/python/lower_case_name.tsfile differ
diff --git a/python/record_write_and_read.tsfile
b/python/record_write_and_read.tsfile
new file mode 100644
index 00000000..4802cc69
Binary files /dev/null and b/python/record_write_and_read.tsfile differ
diff --git a/python/test1.tsfile b/python/test1.tsfile
new file mode 100644
index 00000000..964cc099
Binary files /dev/null and b/python/test1.tsfile differ
diff --git a/python/tests/test_to_tsfile.py b/python/tests/test_to_tsfile.py
index 0e4adcd2..427a4cfd 100644
--- a/python/tests/test_to_tsfile.py
+++ b/python/tests/test_to_tsfile.py
@@ -23,7 +23,7 @@ import pytest
from pandas.core.dtypes.common import is_integer_dtype
from tsfile import to_dataframe, ColumnCategory
-from tsfile.utils import to_tsfile
+from tsfile.utils import dataframe_to_tsfile
def convert_to_nullable_types(df):
@@ -48,9 +48,9 @@ def convert_to_nullable_types(df):
return df
-def test_to_tsfile_basic():
- """Test basic to_tsfile functionality with time column."""
- tsfile_path = "test_to_tsfile_basic.tsfile"
+def test_dataframe_to_tsfile_basic():
+ """Test basic dataframe_to_tsfile functionality with time column."""
+ tsfile_path = "test_dataframe_to_tsfile_basic.tsfile"
try:
if os.path.exists(tsfile_path):
os.remove(tsfile_path)
@@ -63,7 +63,7 @@ def test_to_tsfile_basic():
'value2': [i * 10 for i in range(100)]
})
- to_tsfile(df, tsfile_path, table_name="test_table")
+ dataframe_to_tsfile(df, tsfile_path, table_name="test_table")
# Verify by reading back
df_read = to_dataframe(tsfile_path, table_name="test_table")
@@ -80,9 +80,9 @@ def test_to_tsfile_basic():
os.remove(tsfile_path)
-def test_to_tsfile_with_index():
- """Test to_tsfile using DataFrame index as time when no 'time' column
exists."""
- tsfile_path = "test_to_tsfile_index.tsfile"
+def test_dataframe_to_tsfile_with_index():
+ """Test dataframe_to_tsfile using DataFrame index as time when no 'time'
column exists."""
+ tsfile_path = "test_dataframe_to_tsfile_index.tsfile"
try:
if os.path.exists(tsfile_path):
os.remove(tsfile_path)
@@ -94,7 +94,7 @@ def test_to_tsfile_with_index():
})
df.index = [i * 10 for i in range(50)] # Set index as timestamps
- to_tsfile(df, tsfile_path, table_name="test_table")
+ dataframe_to_tsfile(df, tsfile_path, table_name="test_table")
# Verify by reading back
df_read = to_dataframe(tsfile_path, table_name="test_table")
@@ -112,9 +112,9 @@ def test_to_tsfile_with_index():
os.remove(tsfile_path)
-def test_to_tsfile_custom_time_column():
- """Test to_tsfile with custom time column name."""
- tsfile_path = "test_to_tsfile_custom_time.tsfile"
+def test_dataframe_to_tsfile_custom_time_column():
+ """Test dataframe_to_tsfile with custom time column name."""
+ tsfile_path = "test_dataframe_to_tsfile_custom_time.tsfile"
try:
if os.path.exists(tsfile_path):
os.remove(tsfile_path)
@@ -125,7 +125,7 @@ def test_to_tsfile_custom_time_column():
'value': [i * 3.0 for i in range(30)]
})
- to_tsfile(df, tsfile_path, table_name="test_table",
time_column="timestamp")
+ dataframe_to_tsfile(df, tsfile_path, table_name="test_table",
time_column="timestamp")
# Verify by reading back
df_read = to_dataframe(tsfile_path, table_name="test_table")
@@ -141,9 +141,9 @@ def test_to_tsfile_custom_time_column():
os.remove(tsfile_path)
-def test_to_tsfile_with_tag_columns():
- """Test to_tsfile with tag columns specified."""
- tsfile_path = "test_to_tsfile_tags.tsfile"
+def test_dataframe_to_tsfile_with_tag_columns():
+ """Test dataframe_to_tsfile with tag columns specified."""
+ tsfile_path = "test_dataframe_to_tsfile_tags.tsfile"
try:
if os.path.exists(tsfile_path):
os.remove(tsfile_path)
@@ -155,7 +155,7 @@ def test_to_tsfile_with_tag_columns():
'value': [i * 1.5 for i in range(20)]
})
- to_tsfile(df, tsfile_path, table_name="test_table",
tag_column=["device", "location"])
+ dataframe_to_tsfile(df, tsfile_path, table_name="test_table",
tag_column=["device", "location"])
# Verify by reading back
df_read = to_dataframe(tsfile_path, table_name="test_table")
@@ -171,9 +171,9 @@ def test_to_tsfile_with_tag_columns():
os.remove(tsfile_path)
-def test_to_tsfile_all_datatypes():
- """Test to_tsfile with all supported data types."""
- tsfile_path = "test_to_tsfile_all_types.tsfile"
+def test_dataframe_to_tsfile_all_datatypes():
+ """Test dataframe_to_tsfile with all supported data types."""
+ tsfile_path = "test_dataframe_to_tsfile_all_types.tsfile"
try:
if os.path.exists(tsfile_path):
os.remove(tsfile_path)
@@ -189,7 +189,7 @@ def test_to_tsfile_all_datatypes():
'blob_col': [f"blob{i}".encode('utf-8') for i in range(50)]
})
- to_tsfile(df, tsfile_path, table_name="test_table")
+ dataframe_to_tsfile(df, tsfile_path, table_name="test_table")
# Verify by reading back
df_read = to_dataframe(tsfile_path, table_name="test_table")
@@ -211,9 +211,9 @@ def test_to_tsfile_all_datatypes():
os.remove(tsfile_path)
-def test_to_tsfile_default_table_name():
- """Test to_tsfile with default table name."""
- tsfile_path = "test_to_tsfile_default_name.tsfile"
+def test_dataframe_to_tsfile_default_table_name():
+ """Test dataframe_to_tsfile with default table name."""
+ tsfile_path = "test_dataframe_to_tsfile_default_name.tsfile"
try:
if os.path.exists(tsfile_path):
os.remove(tsfile_path)
@@ -223,7 +223,7 @@ def test_to_tsfile_default_table_name():
'value': [i * 1.0 for i in range(10)]
})
- to_tsfile(df, tsfile_path) # No table_name specified
+ dataframe_to_tsfile(df, tsfile_path) # No table_name specified
# Verify by reading back with default table name
df_read = to_dataframe(tsfile_path, table_name="table")
@@ -233,9 +233,9 @@ def test_to_tsfile_default_table_name():
os.remove(tsfile_path)
-def test_to_tsfile_case_insensitive_time():
- """Test to_tsfile with case-insensitive time column."""
- tsfile_path = "test_to_tsfile_case_time.tsfile"
+def test_dataframe_to_tsfile_case_insensitive_time():
+ """Test dataframe_to_tsfile with case-insensitive time column."""
+ tsfile_path = "test_dataframe_to_tsfile_case_time.tsfile"
try:
if os.path.exists(tsfile_path):
os.remove(tsfile_path)
@@ -245,7 +245,7 @@ def test_to_tsfile_case_insensitive_time():
'value': [i * 2.0 for i in range(20)]
})
- to_tsfile(df, tsfile_path, table_name="test_table")
+ dataframe_to_tsfile(df, tsfile_path, table_name="test_table")
# Verify by reading back
df_read = to_dataframe(tsfile_path, table_name="test_table")
@@ -256,9 +256,9 @@ def test_to_tsfile_case_insensitive_time():
os.remove(tsfile_path)
-def test_to_tsfile_empty_dataframe():
- """Test to_tsfile raises error for empty DataFrame."""
- tsfile_path = "test_to_tsfile_empty.tsfile"
+def test_dataframe_to_tsfile_empty_dataframe():
+ """Test dataframe_to_tsfile raises error for empty DataFrame."""
+ tsfile_path = "test_dataframe_to_tsfile_empty.tsfile"
try:
if os.path.exists(tsfile_path):
os.remove(tsfile_path)
@@ -266,15 +266,15 @@ def test_to_tsfile_empty_dataframe():
df = pd.DataFrame()
with pytest.raises(ValueError, match="DataFrame cannot be None or
empty"):
- to_tsfile(df, tsfile_path)
+ dataframe_to_tsfile(df, tsfile_path)
finally:
if os.path.exists(tsfile_path):
os.remove(tsfile_path)
-def test_to_tsfile_no_data_columns():
- """Test to_tsfile raises error when only time column exists."""
- tsfile_path = "test_to_tsfile_no_data.tsfile"
+def test_dataframe_to_tsfile_no_data_columns():
+ """Test dataframe_to_tsfile raises error when only time column exists."""
+ tsfile_path = "test_dataframe_to_tsfile_no_data.tsfile"
try:
if os.path.exists(tsfile_path):
os.remove(tsfile_path)
@@ -284,15 +284,15 @@ def test_to_tsfile_no_data_columns():
})
with pytest.raises(ValueError, match="DataFrame must have at least one
data column"):
- to_tsfile(df, tsfile_path)
+ dataframe_to_tsfile(df, tsfile_path)
finally:
if os.path.exists(tsfile_path):
os.remove(tsfile_path)
-def test_to_tsfile_invalid_time_column():
- """Test to_tsfile raises error for invalid time column."""
- tsfile_path = "test_to_tsfile_invalid_time.tsfile"
+def test_dataframe_to_tsfile_invalid_time_column():
+ """Test dataframe_to_tsfile raises error for invalid time column."""
+ tsfile_path = "test_dataframe_to_tsfile_invalid_time.tsfile"
try:
if os.path.exists(tsfile_path):
os.remove(tsfile_path)
@@ -304,15 +304,15 @@ def test_to_tsfile_invalid_time_column():
# Time column doesn't exist
with pytest.raises(ValueError, match="Time column 'time' not found"):
- to_tsfile(df, tsfile_path, time_column="time")
+ dataframe_to_tsfile(df, tsfile_path, time_column="time")
finally:
if os.path.exists(tsfile_path):
os.remove(tsfile_path)
-def test_to_tsfile_non_integer_time_column():
- """Test to_tsfile raises error for non-integer time column."""
- tsfile_path = "test_to_tsfile_non_int_time.tsfile"
+def test_dataframe_to_tsfile_non_integer_time_column():
+ """Test dataframe_to_tsfile raises error for non-integer time column."""
+ tsfile_path = "test_dataframe_to_tsfile_non_int_time.tsfile"
try:
if os.path.exists(tsfile_path):
os.remove(tsfile_path)
@@ -323,15 +323,15 @@ def test_to_tsfile_non_integer_time_column():
})
with pytest.raises(TypeError, match="must be integer type"):
- to_tsfile(df, tsfile_path)
+ dataframe_to_tsfile(df, tsfile_path)
finally:
if os.path.exists(tsfile_path):
os.remove(tsfile_path)
-def test_to_tsfile_invalid_tag_column():
- """Test to_tsfile raises error for invalid tag column."""
- tsfile_path = "test_to_tsfile_invalid_tag.tsfile"
+def test_dataframe_to_tsfile_invalid_tag_column():
+ """Test dataframe_to_tsfile raises error for invalid tag column."""
+ tsfile_path = "test_dataframe_to_tsfile_invalid_tag.tsfile"
try:
if os.path.exists(tsfile_path):
os.remove(tsfile_path)
@@ -342,15 +342,15 @@ def test_to_tsfile_invalid_tag_column():
})
with pytest.raises(ValueError, match="Tag column 'invalid' not found"):
- to_tsfile(df, tsfile_path, tag_column=["invalid"])
+ dataframe_to_tsfile(df, tsfile_path, tag_column=["invalid"])
finally:
if os.path.exists(tsfile_path):
os.remove(tsfile_path)
-def test_to_tsfile_string_vs_blob():
- """Test to_tsfile correctly distinguishes between STRING and BLOB."""
- tsfile_path = "test_to_tsfile_string_blob.tsfile"
+def test_dataframe_to_tsfile_string_vs_blob():
+ """Test dataframe_to_tsfile correctly distinguishes between STRING and
BLOB."""
+ tsfile_path = "test_dataframe_to_tsfile_string_blob.tsfile"
try:
if os.path.exists(tsfile_path):
os.remove(tsfile_path)
@@ -361,7 +361,7 @@ def test_to_tsfile_string_vs_blob():
'blob_col': [f"blob{i}".encode('utf-8') for i in range(20)] #
Bytes
})
- to_tsfile(df, tsfile_path, table_name="test_table")
+ dataframe_to_tsfile(df, tsfile_path, table_name="test_table")
# Verify by reading back
df_read = to_dataframe(tsfile_path, table_name="test_table")
diff --git a/python/tsfile/__init__.py b/python/tsfile/__init__.py
index bf755fce..a9237257 100644
--- a/python/tsfile/__init__.py
+++ b/python/tsfile/__init__.py
@@ -34,4 +34,4 @@ from .tsfile_reader import TsFileReaderPy as TsFileReader,
ResultSetPy as Result
from .tsfile_writer import TsFileWriterPy as TsFileWriter
from .tsfile_py_cpp import get_tsfile_config, set_tsfile_config
from .tsfile_table_writer import TsFileTableWriter
-from .utils import to_dataframe
\ No newline at end of file
+from .utils import to_dataframe, dataframe_to_tsfile
\ No newline at end of file
diff --git a/python/tsfile/tsfile_table_writer.py
b/python/tsfile/tsfile_table_writer.py
index e4867c21..56f9c341 100644
--- a/python/tsfile/tsfile_table_writer.py
+++ b/python/tsfile/tsfile_table_writer.py
@@ -21,7 +21,15 @@ from tsfile import TableSchema, Tablet, TableNotExistError
from tsfile import TsFileWriter
from tsfile.constants import TSDataType
from tsfile.exceptions import ColumnNotExistError, TypeMismatchError
-from tsfile.utils import check_string_or_blob
+
+def check_string_or_blob(ts_data_type: TSDataType, dtype, column_series:
pd.Series) -> TSDataType:
+ if ts_data_type == TSDataType.STRING and (dtype == 'object' or str(dtype)
== "<class 'numpy.object_'>"):
+ first_valid_idx = column_series.first_valid_index()
+ if first_valid_idx is not None:
+ first_value = column_series[first_valid_idx]
+ if isinstance(first_value, bytes):
+ return TSDataType.BLOB
+ return ts_data_type
class TsFileTableWriter:
diff --git a/python/tsfile/utils.py b/python/tsfile/utils.py
index d2c19428..f3c2adc5 100644
--- a/python/tsfile/utils.py
+++ b/python/tsfile/utils.py
@@ -22,19 +22,10 @@ import numpy as np
import pandas as pd
from pandas.core.dtypes.common import is_integer_dtype
+from tsfile import ColumnSchema, TableSchema, ColumnCategory, TSDataType
from tsfile.exceptions import TableNotExistError, ColumnNotExistError
from tsfile.tsfile_reader import TsFileReaderPy
-from tsfile import ColumnSchema, TableSchema, ColumnCategory, TSDataType,
TsFileTableWriter
-
-
-def check_string_or_blob(ts_data_type: TSDataType, dtype, column_series:
pd.Series) -> TSDataType:
- if ts_data_type == TSDataType.STRING and (dtype == 'object' or str(dtype)
== "<class 'numpy.object_'>"):
- first_valid_idx = column_series.first_valid_index()
- if first_valid_idx is not None:
- first_value = column_series[first_valid_idx]
- if isinstance(first_value, bytes):
- return TSDataType.BLOB
- return ts_data_type
+from tsfile.tsfile_table_writer import TsFileTableWriter, check_string_or_blob
def to_dataframe(file_path: str,
@@ -174,11 +165,11 @@ def to_dataframe(file_path: str,
def dataframe_to_tsfile(dataframe: pd.DataFrame,
- file_path: str,
- table_name: Optional[str] = None,
- time_column: Optional[str] = None,
- tag_column: Optional[list[str]] = None,
- ):
+ file_path: str,
+ table_name: Optional[str] = None,
+ time_column: Optional[str] = None,
+ tag_column: Optional[list[str]] = None,
+ ):
"""
Write a pandas DataFrame to a TsFile by inferring the table schema from
the DataFrame.
@@ -227,7 +218,8 @@ def dataframe_to_tsfile(dataframe: pd.DataFrame,
if time_column not in dataframe.columns:
raise ValueError(f"Time column '{time_column}' not found in
DataFrame")
if not is_integer_dtype(dataframe[time_column].dtype):
- raise TypeError(f"Time column '{time_column}' must be integer type
(int64 or int), got {dataframe[time_column].dtype}")
+ raise TypeError(
+ f"Time column '{time_column}' must be integer type (int64 or
int), got {dataframe[time_column].dtype}")
time_col_name = time_column
else:
for col in dataframe.columns:
@@ -236,7 +228,8 @@ def dataframe_to_tsfile(dataframe: pd.DataFrame,
time_col_name = col
break
else:
- raise TypeError(f"Time column '{col}' must be integer type
(int64 or int), got {dataframe[col].dtype}")
+ raise TypeError(
+ f"Time column '{col}' must be integer type (int64 or
int), got {dataframe[col].dtype}")
data_columns = [col for col in dataframe.columns if col != time_col_name]
@@ -271,4 +264,4 @@ def dataframe_to_tsfile(dataframe: pd.DataFrame,
df_to_write = dataframe
with TsFileTableWriter(file_path, table_schema) as writer:
- writer.write_dataframe(df_to_write)
\ No newline at end of file
+ writer.write_dataframe(df_to_write)