This is an automated email from the ASF dual-hosted git repository. colinlee pushed a commit to branch docs_py in repository https://gitbox.apache.org/repos/asf/tsfile.git
commit 5074ed42d57e29829ae8a190d4d50647536f7e9f Author: ColinLee <[email protected]> AuthorDate: Tue Dec 23 11:26:17 2025 +0800 add to_dataframe. --- .../InterfaceDefinition-Python.md | 71 ++++++++++++++++++++++ .../develop/QuickStart/QuickStart-PYTHON.md | 9 +++ .../InterfaceDefinition-Python.md | 70 +++++++++++++++++++++ .../latest/QuickStart/QuickStart-PYTHON.md | 9 +++ .../InterfaceDefinition-Python.md | 69 +++++++++++++++++++++ .../develop/QuickStart/QuickStart-PYTHON.md | 9 +++ 6 files changed, 237 insertions(+) diff --git a/src/UserGuide/develop/QuickStart/InterfaceDefinition/InterfaceDefinition-Python.md b/src/UserGuide/develop/QuickStart/InterfaceDefinition/InterfaceDefinition-Python.md index 4798efad..ba17129c 100644 --- a/src/UserGuide/develop/QuickStart/InterfaceDefinition/InterfaceDefinition-Python.md +++ b/src/UserGuide/develop/QuickStart/InterfaceDefinition/InterfaceDefinition-Python.md @@ -280,3 +280,74 @@ class ResultSet: def close(self) ``` + +### to_dataframe + +```python + +def to_dataframe(file_path: str, + table_name: Optional[str] = None, + column_names: Optional[list[str]] = None, + start_time: Optional[int] = None, + end_time: Optional[int] = None, + max_row_num: Optional[int] = None, + as_iterator: bool = False) -> Union[pd.DataFrame, Iterator[pd.DataFrame]]: + + """ + Read data from a TsFile and convert it into a Pandas DataFrame or + an iterator of DataFrames. + + This function supports both table-model and tree-model TsFiles. + Users can filter data by table name, column names, time range, + and maximum number of rows. + + Parameters + ---------- + file_path : str + Path to the TsFile to be read. + + table_name : Optional[str], default None + Name of the table to query in table-model TsFiles. + If None and the file is in table model, the first table + found in the schema will be used. + + column_names : Optional[list[str]], default None + List of column names to query. + - If None, all columns will be returned. + - Column existence will be validated in table-model TsFiles. + + start_time : Optional[int], default None + Start timestamp for the query. + If None, the minimum int64 value is used. + + end_time : Optional[int], default None + End timestamp for the query. + If None, the maximum int64 value is used. + + max_row_num : Optional[int], default None + Maximum number of rows to read. + - If None, all available rows will be returned. + - When `as_iterator` is False, the final DataFrame will be + truncated to this size if necessary. + + as_iterator : bool, default False + Whether to return an iterator of DataFrames instead of + a single concatenated DataFrame. + - True: returns an iterator yielding DataFrames in batches + - False: returns a single Pandas DataFrame + + Returns + ------- + Union[pandas.DataFrame, Iterator[pandas.DataFrame]] + - A Pandas DataFrame if `as_iterator` is False + - An iterator of Pandas DataFrames if `as_iterator` is True + + Raises + ------ + TableNotExistError + If the specified table name does not exist in a table-model TsFile. + + ColumnNotExistError + If any specified column does not exist in the table schema. + """ +``` diff --git a/src/UserGuide/develop/QuickStart/QuickStart-PYTHON.md b/src/UserGuide/develop/QuickStart/QuickStart-PYTHON.md index 30207bb5..e748ed50 100644 --- a/src/UserGuide/develop/QuickStart/QuickStart-PYTHON.md +++ b/src/UserGuide/develop/QuickStart/QuickStart-PYTHON.md @@ -148,6 +148,15 @@ with TsFileReader(table_data_dir) as reader: print(result.read_data_frame()) ``` +use `to_dataframe` to read tsfile as dataframe. + +```Python +import os +import tsfile as ts +table_data_dir = os.path.join(os.path.dirname(__file__), "table_data.tsfile") +print(ts.to_dataframe(table_data_dir)) +``` + ## Sample Code The sample code of using these interfaces is in:https://github.com/apache/tsfile/blob/develop/python/examples/example.py diff --git a/src/UserGuide/latest/QuickStart/InterfaceDefinition/InterfaceDefinition-Python.md b/src/UserGuide/latest/QuickStart/InterfaceDefinition/InterfaceDefinition-Python.md index 4798efad..2c0df223 100644 --- a/src/UserGuide/latest/QuickStart/InterfaceDefinition/InterfaceDefinition-Python.md +++ b/src/UserGuide/latest/QuickStart/InterfaceDefinition/InterfaceDefinition-Python.md @@ -280,3 +280,73 @@ class ResultSet: def close(self) ``` +### to_dataframe + +```python + +def to_dataframe(file_path: str, + table_name: Optional[str] = None, + column_names: Optional[list[str]] = None, + start_time: Optional[int] = None, + end_time: Optional[int] = None, + max_row_num: Optional[int] = None, + as_iterator: bool = False) -> Union[pd.DataFrame, Iterator[pd.DataFrame]]: + + """ + Read data from a TsFile and convert it into a Pandas DataFrame or + an iterator of DataFrames. + + This function supports both table-model and tree-model TsFiles. + Users can filter data by table name, column names, time range, + and maximum number of rows. + + Parameters + ---------- + file_path : str + Path to the TsFile to be read. + + table_name : Optional[str], default None + Name of the table to query in table-model TsFiles. + If None and the file is in table model, the first table + found in the schema will be used. + + column_names : Optional[list[str]], default None + List of column/measurement names to query. + - If None, all columns will be returned. + - Column existence will be validated in table-model TsFiles. + + start_time : Optional[int], default None + Start timestamp for the query. + If None, the minimum int64 value is used. + + end_time : Optional[int], default None + End timestamp for the query. + If None, the maximum int64 value is used. + + max_row_num : Optional[int], default None + Maximum number of rows to read. + - If None, all available rows will be returned. + - When `as_iterator` is False, the final DataFrame will be + truncated to this size if necessary. + + as_iterator : bool, default False + Whether to return an iterator of DataFrames instead of + a single concatenated DataFrame. + - True: returns an iterator yielding DataFrames in batches + - False: returns a single Pandas DataFrame + + Returns + ------- + Union[pandas.DataFrame, Iterator[pandas.DataFrame]] + - A Pandas DataFrame if `as_iterator` is False + - An iterator of Pandas DataFrames if `as_iterator` is True + + Raises + ------ + TableNotExistError + If the specified table name does not exist in a table-model TsFile. + + ColumnNotExistError + If any specified column does not exist in the table schema. + """ +``` \ No newline at end of file diff --git a/src/UserGuide/latest/QuickStart/QuickStart-PYTHON.md b/src/UserGuide/latest/QuickStart/QuickStart-PYTHON.md index 30207bb5..9aa9e571 100644 --- a/src/UserGuide/latest/QuickStart/QuickStart-PYTHON.md +++ b/src/UserGuide/latest/QuickStart/QuickStart-PYTHON.md @@ -148,6 +148,15 @@ with TsFileReader(table_data_dir) as reader: print(result.read_data_frame()) ``` +Use `to_dataframe` to read tsfile as dataframe. + +```Python +import os +import tsfile as ts +table_data_dir = os.path.join(os.path.dirname(__file__), "table_data.tsfile") +print(ts.to_dataframe(table_data_dir)) +``` + ## Sample Code The sample code of using these interfaces is in:https://github.com/apache/tsfile/blob/develop/python/examples/example.py diff --git a/src/zh/UserGuide/develop/QuickStart/InterfaceDefinition/InterfaceDefinition-Python.md b/src/zh/UserGuide/develop/QuickStart/InterfaceDefinition/InterfaceDefinition-Python.md index 7143a583..b0770cd8 100644 --- a/src/zh/UserGuide/develop/QuickStart/InterfaceDefinition/InterfaceDefinition-Python.md +++ b/src/zh/UserGuide/develop/QuickStart/InterfaceDefinition/InterfaceDefinition-Python.md @@ -262,3 +262,72 @@ class ResultSet: ``` +### to_dataframe + +```Python + +def to_dataframe(file_path: str, + table_name: Optional[str] = None, + column_names: Optional[list[str]] = None, + start_time: Optional[int] = None, + end_time: Optional[int] = None, + max_row_num: Optional[int] = None, + as_iterator: bool = False) -> Union[pd.DataFrame, Iterator[pd.DataFrame]]: + """ + 从 TsFile 中读取数据,并将其转换为 Pandas DataFrame + 或 DataFrame 迭代器。 + + 该函数同时支持表模型(table-model)和树模型(tree-model)的 TsFile。 + 用户可以通过表名、列名、时间范围以及最大行数对数据进行过滤。 + + Parameters + ---------- + file_path : str + 要读取的 TsFile 文件路径。 + + table_name : Optional[str], default None + 表模型 TsFile 中要查询的表名。 + 如果为 None 且文件为表模型, + 将使用 schema 中找到的第一个表。 + + column_names : Optional[list[str]], default None + 要查询的列名/测点名列表。 + - 如果为 None,则返回所有列。 + - 在表模型 TsFile 中会校验列是否存在。 + + start_time : Optional[int], default None + 查询的起始时间戳。 + 如果为 None,则使用 int64 的最小值。 + + end_time : Optional[int], default None + 查询的结束时间戳。 + 如果为 None,则使用 int64 的最大值。 + + max_row_num : Optional[int], default None + 读取的最大行数。 + - 如果为 None,则返回所有可用数据。 + - 当 `as_iterator` 为 False 时, + 若结果行数超过该值,DataFrame 将被截断。 + + as_iterator : bool, default False + 是否返回 DataFrame 迭代器,而不是单个合并后的 DataFrame。 + - True:返回按批次生成 DataFrame 的迭代器 + - False:返回单个 Pandas DataFrame + + Returns + ------- + Union[pandas.DataFrame, Iterator[pandas.DataFrame]] + - 当 `as_iterator` 为 False 时,返回 Pandas DataFrame + - 当 `as_iterator` 为 True 时,返回 Pandas DataFrame 迭代器 + + Raises + ------ + TableNotExistError + 当指定的表名在表模型 TsFile 中不存在时抛出。 + + ColumnNotExistError + 当指定的列在表结构中不存在时抛出。 + """ + +``` + diff --git a/src/zh/UserGuide/develop/QuickStart/QuickStart-PYTHON.md b/src/zh/UserGuide/develop/QuickStart/QuickStart-PYTHON.md index b3aad522..543333e2 100644 --- a/src/zh/UserGuide/develop/QuickStart/QuickStart-PYTHON.md +++ b/src/zh/UserGuide/develop/QuickStart/QuickStart-PYTHON.md @@ -149,6 +149,15 @@ with TsFileReader(table_data_dir) as reader: print(result.read_data_frame()) ``` +使用 `to_dataframe` 读取 TsFile 为 Dataframe. + +```Python +import os +import tsfile as ts +table_data_dir = os.path.join(os.path.dirname(__file__), "table_data.tsfile") +print(ts.to_dataframe(table_data_dir)) +``` + ## 示例代码 使用这些接口的示例代码可以在以下链接中找到:https://github.com/apache/tsfile/blob/develop/python/examples/example.py
