This is an automated email from the ASF dual-hosted git repository. colinlee pushed a commit to branch colin_fix_datatype_mismatch in repository https://gitbox.apache.org/repos/asf/tsfile.git
commit 8710db83cf5ba2de6442eeb628ee5206f620ce92 Author: ColinLee <[email protected]> AuthorDate: Tue Mar 18 19:15:25 2025 +0800 fix column datatype mismatch --- cpp/src/reader/table_query_executor.cc | 8 ++- cpp/src/reader/table_result_set.h | 12 +++-- python/examples/example.py | 94 ++++++++-------------------------- python/tsfile/tsfile_reader.pyx | 3 +- 4 files changed, 37 insertions(+), 80 deletions(-) diff --git a/cpp/src/reader/table_query_executor.cc b/cpp/src/reader/table_query_executor.cc index c46afdf4..c097c360 100644 --- a/cpp/src/reader/table_query_executor.cc +++ b/cpp/src/reader/table_query_executor.cc @@ -45,6 +45,12 @@ int TableQueryExecutor::query(const std::string &table_name, for (size_t i = 0; i < columns.size(); ++i) { column_mapping->add(columns[i], static_cast<int>(i), *table_schema); } + std::vector<common::TSDataType> data_types; + data_types.reserve(columns.size()); + for (size_t i = 0; i < columns.size(); ++i) { + auto ind = table_schema->find_column_index(columns[i]); + data_types.push_back(table_schema->get_data_types()[ind]); + } // column_mapping.add(*measurement_filter); auto device_task_iterator = std::unique_ptr<DeviceTaskIterator>( @@ -65,7 +71,7 @@ int TableQueryExecutor::query(const std::string &table_name, } assert(tsblock_reader != nullptr); ret_qds = new TableResultSet(std::move(tsblock_reader), columns, - table_schema->get_data_types()); + data_types); return ret; } diff --git a/cpp/src/reader/table_result_set.h b/cpp/src/reader/table_result_set.h index b0da4019..71e57c47 100644 --- a/cpp/src/reader/table_result_set.h +++ b/cpp/src/reader/table_result_set.h @@ -27,17 +27,21 @@ namespace storage { class TableResultSet : public ResultSet { public: explicit TableResultSet(std::unique_ptr<TsBlockReader> tsblock_reader, - std::vector<std::string> column_names, - std::vector<common::TSDataType> data_types) : tsblock_reader_(std::move(tsblock_reader)), column_names_(column_names), data_types_(data_types) { + std::vector<std::string> column_names, + std::vector<common::TSDataType> data_types) + : tsblock_reader_(std::move(tsblock_reader)), + column_names_(column_names), + data_types_(data_types) { init(); } ~TableResultSet(); - int next(bool &has_next) override; + int next(bool& has_next) override; bool is_null(const std::string& column_name) override; bool is_null(uint32_t column_index) override; RowRecord* get_row_record() override; std::shared_ptr<ResultSetMetadata> get_metadata() override; void close() override; + private: void init(); std::unique_ptr<TsBlockReader> tsblock_reader_; @@ -50,4 +54,4 @@ class TableResultSet : public ResultSet { std::vector<common::TSDataType> data_types_; }; } // namespace storage -#endif // TABLE_RESULT_SET_H \ No newline at end of file +#endif // TABLE_RESULT_SET_H \ No newline at end of file diff --git a/python/examples/example.py b/python/examples/example.py index 5a827060..cd0e61e5 100644 --- a/python/examples/example.py +++ b/python/examples/example.py @@ -17,98 +17,44 @@ import os -from tsfile import DeviceSchema, TimeseriesSchema, ColumnSchema, TableSchema, RowRecord, Field +from tsfile import ColumnSchema, TableSchema from tsfile import Tablet -from tsfile import TsFileWriter, TsFileReader, TSDataType, TSEncoding, Compressor, ColumnCategory +from tsfile import TsFileTableWriter, TsFileReader, TSDataType, TSEncoding, Compressor, ColumnCategory -## tsfile path. -reader_data_dir = os.path.join(os.path.dirname(__file__), "tree_model.tsfile") -if os.path.exists(reader_data_dir): - os.remove(reader_data_dir) - -## Tree Model Write Data - -DEVICE_NAME = "root.device" - -writer = TsFileWriter(reader_data_dir) - -timeseries = TimeseriesSchema("temp1", TSDataType.INT32, TSEncoding.PLAIN, Compressor.UNCOMPRESSED) -timeseries2 = TimeseriesSchema("temp2", TSDataType.INT64) -timeseries3 = TimeseriesSchema("level1", TSDataType.BOOLEAN) - -### register timeseries -writer.register_timeseries(DEVICE_NAME, timeseries) - -### register device -device = DeviceSchema(DEVICE_NAME, [timeseries2, timeseries3]) -writer.register_device(device) - -### Write data with row record -row_num = 10 -for i in range(row_num): - row_record = RowRecord(DEVICE_NAME, i + 1, - [Field("temp1",i, TSDataType.INT32), - Field("temp2", i, TSDataType.INT64)]) - writer.write_row_record(row_record) - -### Flush data and close writer. -writer.close() - -## Tree Model Read Data - -reader = TsFileReader(reader_data_dir) - -### Query device with specify time scope -result = reader.query_timeseries(DEVICE_NAME, ["temp1", "temp2"], 0, 100) - -### Get result list data types -sensor_info_list = result.get_result_column_info() -print(sensor_info_list) - -### Print data -while result.next(): - print(result.get_value_by_name("temp1")) - print(result.get_value_by_index(1)) -result.close() - -### Get query result which can free automatically - -with reader.query_timeseries(DEVICE_NAME, ["temp1"], 0, 100) as result: - while result.next(): - print(result.get_value_by_name("temp1")) - -reader.close() - -## Table Model Write and Read -table_data_dir = os.path.join(os.path.dirname(__file__), "table_model.tsfile") +## Write +table_data_dir = os.path.join(os.path.dirname(__file__), "table_data.tsfile") if os.path.exists(table_data_dir): os.remove(table_data_dir) column1 = ColumnSchema("id", TSDataType.STRING, ColumnCategory.TAG) column2 = ColumnSchema("id2", TSDataType.STRING, ColumnCategory.TAG) column3 = ColumnSchema("value", TSDataType.FLOAT, ColumnCategory.FIELD) +table_schema = TableSchema("test_table", columns=[column1, column2, column3]) + ### Free resource automatically -with TsFileWriter(table_data_dir) as writer: - writer.register_table(TableSchema("test_table", [column1, column2, column3])) +with TsFileTableWriter(table_data_dir, table_schema) as writer: tablet_row_num = 100 - tablet = Tablet("test_table", - ["id1", "id2", "value"], + tablet = Tablet( + ["id", "id2", "value"], [TSDataType.STRING, TSDataType.STRING, TSDataType.FLOAT], - [ColumnCategory.TAG, ColumnCategory.TAG, ColumnCategory.FIELD], tablet_row_num) for i in range(tablet_row_num): tablet.add_timestamp(i, i * 10) - tablet.add_value_by_name("id1", i, "test1") + tablet.add_value_by_name("id", i, "test1") tablet.add_value_by_name("id2", i, "test" + str(i)) tablet.add_value_by_index(2, i, i * 100.2) writer.write_table(tablet) -### Read table data from tsfile reader. -# with TsFileReader(table_data_dir) as reader: -# with reader.query_table("test_table", ["id2", "value"], 0, 50) as result: -# while result.next(): -# print(result.get_value_by_name("id2")) -# print(result.get_value_by_name("value")) +## Read + +### Free resource automatically +with TsFileReader(table_data_dir) as reader: + with reader.query_table("test_table", ["id2", "value"], 0, 50) as result: + while result.next(): + print(result.get_value_by_name("id2")) + print(result.get_value_by_name("value")) + print(result.read_data_frame()) + diff --git a/python/tsfile/tsfile_reader.pyx b/python/tsfile/tsfile_reader.pyx index e168c400..716f8f02 100644 --- a/python/tsfile/tsfile_reader.pyx +++ b/python/tsfile/tsfile_reader.pyx @@ -26,6 +26,7 @@ from .tsfile_cpp cimport * from .tsfile_py_cpp cimport * from libc.stdlib cimport free +from libc.stdint cimport INT64_MIN, INT64_MAX cimport cython from typing import List @@ -281,7 +282,7 @@ cdef class TsFileReaderPy: self.reader = tsfile_reader_new_c(pathname) def query_table(self, table_name : str, column_names : List[str], - start_time : int = 0, end_time : int = 0) -> ResultSetPy: + start_time : int = INT64_MIN, end_time : int = INT64_MAX) -> ResultSetPy: """ Execute a time range query on specified table and columns. :return: query result handler.
