This is an automated email from the ASF dual-hosted git repository. colinlee pushed a commit to branch bench_mark in repository https://gitbox.apache.org/repos/asf/tsfile.git
commit c9965ba8b1322a5b2349a314bb09a5e2071746cd Author: colin <[email protected]> AuthorDate: Sat Apr 5 20:49:06 2025 +0800 tmp benchmark. --- cpp/bench_mark/bench_mark_src/CMakeLists.txt | 2 +- cpp/bench_mark/bench_mark_src/bench_conf.h | 9 +- cpp/bench_mark/bench_mark_src/bench_mark.cc | 161 ------------------------ cpp/bench_mark/bench_mark_src/bench_mark.h | 57 ++++++--- cpp/bench_mark/bench_mark_src/bench_mark_c.cpp | 5 + cpp/bench_mark/bench_mark_src/bench_mark_cpp.cc | 80 ++++++++++++ 6 files changed, 130 insertions(+), 184 deletions(-) diff --git a/cpp/bench_mark/bench_mark_src/CMakeLists.txt b/cpp/bench_mark/bench_mark_src/CMakeLists.txt index dbad71f9..b38e457c 100644 --- a/cpp/bench_mark/bench_mark_src/CMakeLists.txt +++ b/cpp/bench_mark/bench_mark_src/CMakeLists.txt @@ -49,7 +49,7 @@ message("SDK_LIB_DIR: ${SDK_LIB_DIR}") link_directories(${SDK_LIB_DIR}) find_library(my_tsfile_lib NAMES tsfile PATHS ${SDK_LIB_DIR} NO_DEFAULT_PATH REQUIRED) -add_executable(bench_mark_src bench_mark.cc) +add_executable(bench_mark_src bench_mark_cpp.cc) target_link_libraries(bench_mark_src ${my_tsfile_lib}) diff --git a/cpp/bench_mark/bench_mark_src/bench_conf.h b/cpp/bench_mark/bench_mark_src/bench_conf.h index 486d0b14..ef0b39e6 100644 --- a/cpp/bench_mark/bench_mark_src/bench_conf.h +++ b/cpp/bench_mark/bench_mark_src/bench_conf.h @@ -20,8 +20,9 @@ #include <vector> namespace bench { -int LOOP_NUM = 100000; -int THREAD_NUM = 1; -int TIMESERIES_NUM = 50; -std::vector<int> TYPE_LIST = {0, 0, 1, 0, 1}; +int tablet_num = 1000; +int tablet_row_num = 10000; +int tag_num = 2; +std::vector<float> diversity_rate = {0.5, 0.5}; +std::vector<int> field_type_vector = {1, 1, 1, 1, 1}; } // namespace bench diff --git a/cpp/bench_mark/bench_mark_src/bench_mark.cc b/cpp/bench_mark/bench_mark_src/bench_mark.cc deleted file mode 100644 index 09c9eb98..00000000 --- a/cpp/bench_mark/bench_mark_src/bench_mark.cc +++ /dev/null @@ -1,161 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * License); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -#include <chrono> -#include <cmath> -#include <iostream> -#include <numeric> -#include <string> - -#include "bench_conf.h" -#include "common/db_common.h" -#include "common/global.h" -#include "common/path.h" -#include "writer/tsfile_writer.h" - -std::vector<int> register_timeseries(storage::TsFileWriter& writer, - int timeseries_num, - std::vector<int> type_list) { - auto start = std::chrono::high_resolution_clock::now(); - int sum = std::accumulate(type_list.begin(), type_list.end(), 0); - std::vector<float> ratio_list; - for (int i = 0; i < type_list.size(); i++) { - ratio_list.push_back((float)type_list[i] / sum); - } - std::vector<int> type_num; - for (int i = 0; i < common::TSDataType::TEXT - 1; i++) { - type_num.push_back((int)std::ceil(timeseries_num * ratio_list[i])); - } - type_num.push_back(timeseries_num - - std::accumulate(type_num.begin(), type_num.end(), 0)); - writer.open("/tmp/tsfile_test.tsfile", O_CREAT | O_RDWR, 0644); - int ind = 0; - int ret = 0; - int type = 0; - for (auto num : type_num) { - for (int i = 0; i < num; i++) { - std::string device_name = "root.db001.dev" + std::to_string(ind); - std::string measurement_name = "m" + std::to_string(ind); - ret = writer.register_timeseries( - device_name, measurement_name, (common::TSDataType)type, - common::TSEncoding::PLAIN, - common::CompressionType::UNCOMPRESSED); - ind++; - } - std::cout << "register finished for TsDataType" - << common::s_data_type_names[type] - << " timeseries num: " << num << std::endl; - type++; - } - auto end = std::chrono::high_resolution_clock::now(); - std::chrono::duration<double> elapsed = end - start; - std::cout << "register " << timeseries_num << "timeseries in file" - << "./test_data/tsfile_test.tsfile" << std::endl; - std::cout << "register timeseries cost time: " << elapsed.count() << "s" - << std::endl; - return type_num; -} - -void test_writer_benchmark(storage::TsFileWriter& writer, int loop_num, - std::vector<int> type_num) { - std::cout << "start writing data" << std::endl; - auto start = std::chrono::high_resolution_clock::now(); - int type = 0; - for (int i = 0; i < loop_num; i++) { - int ind = 0; - for (auto num : type_num) { - for (int j = 0; j < num; j++) { - std::string device_name = - "root.db001.dev" + std::to_string(ind); - std::string measurement_name = "m" + std::to_string(ind); - long long currentTimeStamp = i; - storage::TsRecord record(currentTimeStamp, device_name, 1); - switch (type) { - case common::INT32: { - storage::DataPoint point(measurement_name, 10000 + i); - record.points_.push_back(point); - break; - } - case common::INT64: { - storage::DataPoint point(measurement_name, - int64_t(10000 + i)); - record.points_.push_back(point); - break; - } - case common::BOOLEAN: { - storage::DataPoint point(measurement_name, i / 2 == 0); - record.points_.push_back(point); - break; - } - case common::FLOAT: { - storage::DataPoint point(measurement_name, (float)i); - record.points_.push_back(point); - break; - } - case common::DOUBLE: { - storage::DataPoint point(measurement_name, (double)i); - record.points_.push_back(point); - break; - } - } - int ret = writer.write_record(record); - ASSERT(ret == 0); - ind++; - } - type++; - } - } - - auto end = std::chrono::high_resolution_clock::now(); - std::chrono::duration<double> elapsed = end - start; - int timeseries_num = std::accumulate(type_num.begin(), type_num.end(), 0); - std::cout << "writer loop: " << loop_num - << " timeseries num: " << timeseries_num << " records in file" - << "./test_data/tsfile_test.tsfile" << std::endl; - std::cout << "total num of points: " << loop_num * timeseries_num - << std::endl; - std::cout << "writer data cost time: " << elapsed.count() << "s" - << std::endl; - std::cout << "writer data speed:" - << loop_num * timeseries_num / elapsed.count() << " points/s" - << std::endl; - writer.flush(); - writer.close(); - auto end_flush = std::chrono::high_resolution_clock::now(); - std::chrono::duration<double> elapsed_flush = end_flush - end; - std::cout << "flush data cost time: " << elapsed_flush.count() << "s" - << std::endl; -} - -int main() { - std::cout << "LibTsFile benchmark" << std::endl; - std::cout << "LOOP_NUM:" << bench::LOOP_NUM << std::endl; - std::cout << "THREAD_NUM:" << bench::THREAD_NUM << std::endl; - std::cout << "TIMESERIES_NUM:" << bench::TIMESERIES_NUM << std::endl; - std::cout << "TYPE_LIST: " << bench::TYPE_LIST[0] << ":" - << bench::TYPE_LIST[1] << ":" << bench::TYPE_LIST[2] << ":" - << bench::TYPE_LIST[3] << ":" << bench::TYPE_LIST[4] << ":" - << bench::TYPE_LIST[5] << std::endl; - std::cout << "init tsfile config value" << std::endl; - common::init_config_value(); - storage::TsFileWriter writer; - auto type_num = - register_timeseries(writer, bench::TIMESERIES_NUM, bench::TYPE_LIST); - test_writer_benchmark(writer, bench::LOOP_NUM, type_num); - return 0; -} diff --git a/cpp/bench_mark/bench_mark_src/bench_mark.h b/cpp/bench_mark/bench_mark_src/bench_mark.h index e3bbd95d..fa31b8c0 100644 --- a/cpp/bench_mark/bench_mark_src/bench_mark.h +++ b/cpp/bench_mark/bench_mark_src/bench_mark.h @@ -1,18 +1,39 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * License); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ +// +// Created by colin on 4/2/25. +// + +#ifndef TSFILE_BENCH_MARK_BENCH_MARK_H +#define TSFILE_BENCH_MARK_BENCH_MARK_H +namespace bench { + +const char* data_types_name[5] = { + "BOOLEAN", "INT32", "INT64", "FLOAT", "DOUBLE" +} + +const int data_type[5] = { + 0, 1, 2, 3, 4 +}; + +inline void print_config () { + std::cout << "TsFile CPP benchmark" << std::endl; + std::cout << "Schema Configuration:" << std::endl; + std::cout << "Tag Column num: " << bench::tag_num << std::endl; + std::cout << "Diversity rate of tag :" << std::endl; + std::cout << bench::repetition_rate[0]; + for (int i = 0; i < bench::tag_num; i++) { + std::cout << ":"<< bench::repetition_rate[i]; + } + + std::cout<<std::endl; + std::cout << "Filed Column and types: " << std::endl; + for (int i = 0; i < 5; i++) { + std::cout << bench::data_types_name[i] << "x" << bench::field_type_vector[i] << "\t" << std::endl; + } + std::cout << "Tablet num:" << bench::tablet_num << std::endl; + std::cout << "Tablet row num:" << bench::tablet_row_num << std::endl; +} + +} + + +#endif // TSFILE_BENCH_MARK_BENCH_MARK_H diff --git a/cpp/bench_mark/bench_mark_src/bench_mark_c.cpp b/cpp/bench_mark/bench_mark_src/bench_mark_c.cpp new file mode 100644 index 00000000..b52c9cff --- /dev/null +++ b/cpp/bench_mark/bench_mark_src/bench_mark_c.cpp @@ -0,0 +1,5 @@ +// +// Created by colin on 4/2/25. +// + +#include "bench_mark_c.h" diff --git a/cpp/bench_mark/bench_mark_src/bench_mark_cpp.cc b/cpp/bench_mark/bench_mark_src/bench_mark_cpp.cc new file mode 100644 index 00000000..4063e199 --- /dev/null +++ b/cpp/bench_mark/bench_mark_src/bench_mark_cpp.cc @@ -0,0 +1,80 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * License); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +#include <chrono> +#include <cmath> +#include <iostream> +#include <numeric> +#include <string> +#include <vector> + +#include "bench_conf.h" +#include "bench_mark.h" +#include "common/db_common.h" +#include "common/global.h" +#include "common/path.h" +#include "file/write_file.h" +#include "utils/db_utils.h" +#include "writer/tsfile_table_writer.h" + +using namespace storage; +using namespace common; + +TableSchema* gen_table_schema(int tag_num, + const std::vector<int>& field_type_vector) { + std::vector<common::ColumnSchema> schemas; + for (int i = 0; i < tag_num; i++) { + schemas.emplace_back(std::string("TAG" + std::to_string(i)), + common::TSDataType::STRING, + common::ColumnCategory::TAG); + } + for (int i = 0; i < field_type_vector.size(); i++) { + int column_num = field_type_vector[i]; + for (int j = 0; j < column_num; j++) { + schemas.emplace_back( + std::string("FIELD" + bench::data_types_name[i] + + std::to_string(j)), + static_cast<TSDataType>(bench::data_type[i]), + ColumnCategory::FIELD); + } + } + + return new TableSchema("TestTable", schemas); +} + + + + +int main() { + int code = common::E_OK; + print_config(); + common::init_config_value(); + // benchmark for write + storage::WriteFile file = storage::WriteFile(); + int flags = O_WRONLY | O_CREAT | O_TRUNC; +#ifdef _WIN32 + flags |= O_BINARY; +#endif + mode_t mode = 0666; + code = file.create("bench_mark_cpp.tsfile", flags, mode); + if (code != common::E_OK) { + return -1; + } + + TsFileTableWriter writer = new TsFileTableWriter +}
