This is an automated email from the ASF dual-hosted git repository. colinlee pushed a commit to branch bench_mark in repository https://gitbox.apache.org/repos/asf/tsfile.git
commit 26abc91811d5e6552f4f110ce2345201f84b4432 Author: ColinLee <[email protected]> AuthorDate: Sun Apr 6 13:41:16 2025 +0800 add benchmark. --- cpp/CMakeLists.txt | 7 +- cpp/bench_mark/CMakeLists.txt | 26 ++- cpp/bench_mark/bench_mark_src/CMakeLists.txt | 57 ------ cpp/bench_mark/bench_mark_src/bench_mark_cpp.cc | 80 -------- .../{bench_mark_src => src}/bench_conf.h | 15 +- .../{bench_mark_src => src}/bench_mark.h | 23 ++- .../bench_mark_c.cpp => src/bench_mark_c.cc} | 2 +- cpp/bench_mark/src/bench_mark_cpp.cc | 210 +++++++++++++++++++++ cpp/src/writer/time_chunk_writer.cc | 3 + 9 files changed, 266 insertions(+), 157 deletions(-) diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 75684ce0..ff1789d7 100755 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -26,7 +26,6 @@ if(CMAKE_CXX_COMPILER_ID MATCHES "GNU") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-error=maybe-uninitialized -D__STDC_FORMAT_MACROS") endif() -message("cmake using: USE_CPP11=${USE_CPP11}") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11") if(DEFINED ENV{CXX}) @@ -104,7 +103,13 @@ add_subdirectory(third_party) add_subdirectory(src) add_subdirectory(test) add_subdirectory(examples) +add_subdirectory(bench_mark) +set(TESTS_ENABLED ON) if(TESTS_ENABLED) add_dependencies(TsFile_Test tsfile) endif() +set(BENCH_MARK_ENABLED ON) +if(BENCH_MARK_ENABLED) + add_dependencies(bench_mark tsfile) +endif() diff --git a/cpp/bench_mark/CMakeLists.txt b/cpp/bench_mark/CMakeLists.txt index 6db63999..ddb1b333 100644 --- a/cpp/bench_mark/CMakeLists.txt +++ b/cpp/bench_mark/CMakeLists.txt @@ -17,17 +17,31 @@ specific language governing permissions and limitations under the License. ]] message("Running in bench_mark directory") +cmake_minimum_required(VERSION 3.1) +project(tsfile_bench_mark_project) + if(DEFINED ENV{CXX}) set(CMAKE_CXX_COMPILER $ENV{CXX}) endif() -set(CMAKE_CXX_FLAGS "$ENV{CXX_FLAGS} -Wall -Werror") +include_directories( + ${LIBRARY_INCLUDE_DIR} + ${THIRD_PARTY_INCLUDE} + ${CMAKE_SOURCE_DIR}/third_party/lz4 + ${CMAKE_SOURCE_DIR}/third_party/lzokay + ${CMAKE_SOURCE_DIR}/third_party/zlib-1.2.13 + ${CMAKE_SOURCE_DIR}/third_party/google_snappy + ${CMAKE_SOURCE_DIR}/third_party/antlr4-cpp-runtime-4/runtime/src +) -if (${USE_CPP11}) - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11") - set(CMAKE_CXX_STANDARD 11) +link_directories(${LIBRARY_OUTPUT_PATH}) +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11") +if (CMAKE_BUILD_TYPE STREQUAL "Debug") + SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -g -O0") else() - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++03") + SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3") endif() +message("CMAKE DEBUG: CMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS}") -add_subdirectory(bench_mark_src) \ No newline at end of file +add_executable(bench_mark src/bench_mark_cpp.cc src/bench_mark_c.cc) +target_link_libraries(bench_mark tsfile) diff --git a/cpp/bench_mark/bench_mark_src/CMakeLists.txt b/cpp/bench_mark/bench_mark_src/CMakeLists.txt deleted file mode 100644 index b38e457c..00000000 --- a/cpp/bench_mark/bench_mark_src/CMakeLists.txt +++ /dev/null @@ -1,57 +0,0 @@ -#[[ -Licensed to the Apache Software Foundation (ASF) under one -or more contributor license agreements. See the NOTICE file -distributed with this work for additional information -regarding copyright ownership. The ASF licenses this file -to you under the Apache License, Version 2.0 (the -"License"); you may not use this file except in compliance -with the License. You may obtain a copy of the License at - - https://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, -software distributed under the License is distributed on an -"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -KIND, either express or implied. See the License for the -specific language governing permissions and limitations -under the License. -]] -cmake_minimum_required(VERSION 3.1) -project(libtsfile_bench_mark_project) -message("Running in bench_mark/bench_mark_src directory") -if(DEFINED ENV{CXX}) - set(CMAKE_CXX_COMPILER $ENV{CXX}) -endif() - -set(SDK_BENCH_MARK_DIR ${PROJECT_SOURCE_DIR}/) -message("PROJECT DIR: ${SDK_BENCH_MARK_DIR}") -set(SDK_INCLUDE_DIR_DEBUG ${SKD_BENCHH_MARK_DIR}../../build/Debug/bin/libtsfile_sdk/include) -set(SDK_INCLUDE_DIR_RELEASE ${SKD_BENCHH_MARK_DIR}../../build/Release/bin/libtsfile_sdk/include) -set(SDK_LIB_DIR_DEBUG ${SKD_BENCHH_MARK_DIR}../../build/Debug/bin/libtsfile_sdk/lib) -set(SDK_LIB_DIR_RELEASE ${SKD_BENCHH_MARK_DIR}../../build/Release/bin/libtsfile_sdk/lib) - -if (USE_SDK_DEBUG) - SET(SKD_INCLUDE_DIR ${SDK_INCLUDE_DIR_DEBUG}) - SET(SDK_LIB_DIR ${SDK_LIB_DIR_DEBUG}) - SET(CMAKE_CXX_FLAGS "-g -O0") -else() - SET(SKD_INCLUDE_DIR ${SDK_INCLUDE_DIR_RELEASE}) - SET(SDK_LIB_DIR ${SDK_LIB_DIR_RELEASE}) - SET(CMAKE_CXX_FLAGS "-O3") -endif() - -include_directories(${SKD_INCLUDE_DIR}) -set(MAKE_INCLUDE ${CMAKE_CURRENT_SOURCE_DIR}/../../src) -include_directories(${MAKE_INCLUDE}) -message("MAKE_INCLUDE: ${MAKE_INCLUDE}") -message("SDK_INCLUDE_DIR: ${SKD_INCLUDE_DIR}") -message("SDK_LIB_DIR: ${SDK_LIB_DIR}") - -link_directories(${SDK_LIB_DIR}) -find_library(my_tsfile_lib NAMES tsfile PATHS ${SDK_LIB_DIR} NO_DEFAULT_PATH REQUIRED) -add_executable(bench_mark_src bench_mark_cpp.cc) -target_link_libraries(bench_mark_src ${my_tsfile_lib}) - - - - diff --git a/cpp/bench_mark/bench_mark_src/bench_mark_cpp.cc b/cpp/bench_mark/bench_mark_src/bench_mark_cpp.cc deleted file mode 100644 index 4063e199..00000000 --- a/cpp/bench_mark/bench_mark_src/bench_mark_cpp.cc +++ /dev/null @@ -1,80 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * License); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -#include <chrono> -#include <cmath> -#include <iostream> -#include <numeric> -#include <string> -#include <vector> - -#include "bench_conf.h" -#include "bench_mark.h" -#include "common/db_common.h" -#include "common/global.h" -#include "common/path.h" -#include "file/write_file.h" -#include "utils/db_utils.h" -#include "writer/tsfile_table_writer.h" - -using namespace storage; -using namespace common; - -TableSchema* gen_table_schema(int tag_num, - const std::vector<int>& field_type_vector) { - std::vector<common::ColumnSchema> schemas; - for (int i = 0; i < tag_num; i++) { - schemas.emplace_back(std::string("TAG" + std::to_string(i)), - common::TSDataType::STRING, - common::ColumnCategory::TAG); - } - for (int i = 0; i < field_type_vector.size(); i++) { - int column_num = field_type_vector[i]; - for (int j = 0; j < column_num; j++) { - schemas.emplace_back( - std::string("FIELD" + bench::data_types_name[i] + - std::to_string(j)), - static_cast<TSDataType>(bench::data_type[i]), - ColumnCategory::FIELD); - } - } - - return new TableSchema("TestTable", schemas); -} - - - - -int main() { - int code = common::E_OK; - print_config(); - common::init_config_value(); - // benchmark for write - storage::WriteFile file = storage::WriteFile(); - int flags = O_WRONLY | O_CREAT | O_TRUNC; -#ifdef _WIN32 - flags |= O_BINARY; -#endif - mode_t mode = 0666; - code = file.create("bench_mark_cpp.tsfile", flags, mode); - if (code != common::E_OK) { - return -1; - } - - TsFileTableWriter writer = new TsFileTableWriter -} diff --git a/cpp/bench_mark/bench_mark_src/bench_conf.h b/cpp/bench_mark/src/bench_conf.h similarity index 72% rename from cpp/bench_mark/bench_mark_src/bench_conf.h rename to cpp/bench_mark/src/bench_conf.h index ef0b39e6..98ddc35e 100644 --- a/cpp/bench_mark/bench_mark_src/bench_conf.h +++ b/cpp/bench_mark/src/bench_conf.h @@ -17,12 +17,17 @@ * under the License. */ +#ifndef TSFILE_BENCH_MARK_BENCH_CONF_H +#define TSFILE_BENCH_MARK_BENCH_CONF_H + #include <vector> namespace bench { -int tablet_num = 1000; -int tablet_row_num = 10000; -int tag_num = 2; -std::vector<float> diversity_rate = {0.5, 0.5}; -std::vector<int> field_type_vector = {1, 1, 1, 1, 1}; +static int tablet_num = 1000; +static int tablet_row_num = 10000; +static int tag_num = 2; +static std::vector<float> diversity_rate = {0.5f, 0.5f}; +static std::vector<int> field_type_vector = {1, 1, 1, 1, 1}; } // namespace bench + +#endif // TSFILE_BENCH_MARK_BENCH_CONF_H diff --git a/cpp/bench_mark/bench_mark_src/bench_mark.h b/cpp/bench_mark/src/bench_mark.h similarity index 60% rename from cpp/bench_mark/bench_mark_src/bench_mark.h rename to cpp/bench_mark/src/bench_mark.h index fa31b8c0..6fcb64ea 100644 --- a/cpp/bench_mark/bench_mark_src/bench_mark.h +++ b/cpp/bench_mark/src/bench_mark.h @@ -4,33 +4,42 @@ #ifndef TSFILE_BENCH_MARK_BENCH_MARK_H #define TSFILE_BENCH_MARK_BENCH_MARK_H +#include <iostream> +#include "bench_conf.h" namespace bench { -const char* data_types_name[5] = { +static const char* data_types_name[5] = { "BOOLEAN", "INT32", "INT64", "FLOAT", "DOUBLE" -} +}; -const int data_type[5] = { +static const int data_type[5] = { 0, 1, 2, 3, 4 }; +static int column_num = 0; + inline void print_config () { std::cout << "TsFile CPP benchmark" << std::endl; std::cout << "Schema Configuration:" << std::endl; std::cout << "Tag Column num: " << bench::tag_num << std::endl; std::cout << "Diversity rate of tag :" << std::endl; - std::cout << bench::repetition_rate[0]; - for (int i = 0; i < bench::tag_num; i++) { - std::cout << ":"<< bench::repetition_rate[i]; + std::cout << bench::diversity_rate[0]; + for (int i = 1; i < bench::tag_num; i++) { + std::cout << ":"<< bench::diversity_rate[i]; } std::cout<<std::endl; std::cout << "Filed Column and types: " << std::endl; + column_num = 0; + column_num += diversity_rate.size(); for (int i = 0; i < 5; i++) { - std::cout << bench::data_types_name[i] << "x" << bench::field_type_vector[i] << "\t" << std::endl; + std::cout << bench::data_types_name[i] << "x" << bench::field_type_vector[i] << " "; + column_num += field_type_vector[i]; } + std::cout << std::endl; std::cout << "Tablet num:" << bench::tablet_num << std::endl; std::cout << "Tablet row num:" << bench::tablet_row_num << std::endl; + std::cout << "Total points is " << tablet_num * tablet_row_num * column_num<< std::endl; } } diff --git a/cpp/bench_mark/bench_mark_src/bench_mark_c.cpp b/cpp/bench_mark/src/bench_mark_c.cc similarity index 59% rename from cpp/bench_mark/bench_mark_src/bench_mark_c.cpp rename to cpp/bench_mark/src/bench_mark_c.cc index b52c9cff..b9988fb0 100644 --- a/cpp/bench_mark/bench_mark_src/bench_mark_c.cpp +++ b/cpp/bench_mark/src/bench_mark_c.cc @@ -2,4 +2,4 @@ // Created by colin on 4/2/25. // -#include "bench_mark_c.h" +#include "bench_mark.h" diff --git a/cpp/bench_mark/src/bench_mark_cpp.cc b/cpp/bench_mark/src/bench_mark_cpp.cc new file mode 100644 index 00000000..f02620e3 --- /dev/null +++ b/cpp/bench_mark/src/bench_mark_cpp.cc @@ -0,0 +1,210 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * License); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +#include <sys/types.h> + +#include <chrono> +#include <cmath> +#include <iostream> +#include <numeric> +#include <string> +#include <vector> + +#include "bench_conf.h" +#include "bench_mark.h" +#include "common/db_common.h" +#include "common/path.h" +#include "common/tablet.h" +#include "file/write_file.h" +#include "utils/db_utils.h" +#include "writer/tsfile_table_writer.h" + +using namespace storage; +using namespace common; + +std::vector<std::string> columns_name; +std::vector<TSDataType> data_types; + +void printProgressBar(int current, int total, int barWidth = 50) { + float progress = static_cast<float>(current) / total; + int pos = barWidth * progress; + + std::cout << "["; + for (int i = 0; i < barWidth; ++i) { + if (i < pos) + std::cout << "="; + else if (i == pos) + std::cout << ">"; + else + std::cout << " "; + } + std::cout << "] " << int(progress * 100.0) << " %\r"; + std::cout.flush(); +} + +TableSchema* gen_table_schema(int tag_num, + const std::vector<int>& field_type_vector) { + std::vector<common::ColumnSchema> schemas; + for (int i = 0; i < tag_num; i++) { + std::string column_name = std::string("TAG" + std::to_string(i)); + schemas.emplace_back(column_name, common::TSDataType::STRING, + common::ColumnCategory::TAG); + columns_name.push_back(column_name); + data_types.push_back(TSDataType::STRING); + } + for (int i = 0; i < field_type_vector.size(); i++) { + int column_num = field_type_vector[i]; + for (int j = 0; j < column_num; j++) { + std::string column_name = + std::string("FIELD" + std::to_string(i) + std::to_string(j)); + TSDataType data_type = static_cast<TSDataType>(bench::data_type[i]); + data_types.push_back(data_type); + columns_name.push_back(column_name); + schemas.emplace_back(column_name, data_type, ColumnCategory::FIELD); + } + } + return new TableSchema("TestTable", schemas); +} + +int main() { + int code = common::E_OK; + bench::print_config(); + common::init_config_value(); + // benchmark for write + storage::WriteFile file = storage::WriteFile(); + int flags = O_WRONLY | O_CREAT | O_TRUNC; +#ifdef _WIN32 + flags |= O_BINARY; +#endif + mode_t mode = 0666; + code = file.create("bench_mark_cpp.tsfile", flags, mode); + if (code != common::E_OK) { + return -1; + } + + TableSchema* table_schema = + gen_table_schema(bench::tag_num, bench::field_type_vector); + auto writer = new TsFileTableWriter(&file, table_schema); + delete (table_schema); + int64_t timestamp = 0; + auto start = std::chrono::high_resolution_clock::now(); + int64_t prepare_time = 0; + int64_t writing_time = 0; + int batch_num = bench::tablet_row_num * bench::diversity_rate[0]; + int batch_num2 = batch_num * bench::diversity_rate[1]; + for (int i = 0; i < bench::tablet_num; i++) { + printProgressBar(i, bench::tablet_num); + auto tablet_start = std::chrono::high_resolution_clock::now(); + auto* tablet = + new Tablet(columns_name, data_types, bench::tablet_row_num); + int tag1_num = 0; + int tag2_num = 0; + int tag1_row_num = 0; + int tag2_row_num = 0; + for (int j = 0; j < bench::tablet_row_num; j++) { + if (tag1_row_num > batch_num) { + tag1_row_num = 0; + tag1_num++; + tag2_row_num = 0; + tag2_num = 0; + } + tablet->add_timestamp(j, timestamp++); + tablet->add_value( + j, 0, std::string("tag1_" + std::to_string(tag1_num)).c_str()); + tag1_row_num++; + if (tag2_row_num > batch_num2) { + tag2_row_num = 0; + tag2_num++; + } + tablet->add_value( + j, 1, std::string("tag2_" + std::to_string(tag2_num)).c_str()); + tag2_row_num++; + for (int col = 2; col < data_types.size(); col++) { + switch (data_types[col]) { + case TSDataType::INT64: + tablet->add_value(j, col, + static_cast<int64_t>(timestamp)); + break; + case TSDataType::FLOAT: + tablet->add_value(j, col, + static_cast<float>(timestamp * 1.1)); + break; + case TSDataType::DOUBLE: + tablet->add_value(j, col, + static_cast<double>(timestamp * 1.1)); + break; + case TSDataType::INT32: + tablet->add_value(j, col, + static_cast<int32_t>(timestamp)); + break; + case TSDataType::BOOLEAN: + tablet->add_value(j, col, + static_cast<bool>(timestamp % 2)); + break; + default: + // + } + } + } + auto tablet_end = std::chrono::high_resolution_clock::now(); + auto duration = std::chrono::duration_cast<std::chrono::microseconds>( + tablet_end - tablet_start); + prepare_time += duration.count(); + auto write_start = std::chrono::high_resolution_clock::now(); + writer->write_table(*tablet); + auto write_end = std::chrono::high_resolution_clock::now(); + writing_time += std::chrono::duration_cast<std::chrono::microseconds>( + write_end - write_start) + .count(); + delete tablet; + } + auto close_start = std::chrono::high_resolution_clock::now(); + writer->close(); + auto close_end = std::chrono::high_resolution_clock::now(); + writing_time += std::chrono::duration_cast<std::chrono::microseconds>( + close_end - close_start) + .count(); + delete writer; + auto end = std::chrono::high_resolution_clock::now(); + + FILE* file_to_size = fopen("bench_mark_cpp.tsfile", "rb"); + if (!file_to_size) { + std::cout << "unable to open file" << std::endl; + return -1; + } + fseeko(file_to_size, 0, SEEK_END); + off_t size = ftello(file_to_size); + fclose(file_to_size); + std::cout << "finish bench mark for cpp" << std::endl; + std::cout << "tsfile size is " << size << " bytes " << " ~ " << size / 1024 + << "KB" << std::endl; + float pre_time = prepare_time / 1000.0 / 1000.0; + float write_time = writing_time / 1000.0 / 1000.0; + std::cout << "prepare data time is " << pre_time << " s" << std::endl; + std::cout << "writing data time is " << write_time << " s" << std::endl; + std::cout << "writing speed is " + << static_cast<long long>(bench::tablet_num * bench::tablet_row_num * bench::column_num / + (pre_time + write_time)) + << " points/s" << std::endl; + std::cout << "total time is " + << std::chrono::duration_cast<std::chrono::microseconds>(end - + start) + .count() / + 1000.0 / 1000.0 + << " s" << std::endl; +} diff --git a/cpp/src/writer/time_chunk_writer.cc b/cpp/src/writer/time_chunk_writer.cc index 892c0d1c..35684f59 100644 --- a/cpp/src/writer/time_chunk_writer.cc +++ b/cpp/src/writer/time_chunk_writer.cc @@ -100,6 +100,9 @@ int TimeChunkWriter::seal_cur_page(bool end_chunk) { time_page_writer_.destroy_page_data(); time_page_writer_.reset(); } else { + if (first_page_statistic_ == nullptr) { + std::cout<<"error"<<std::endl; + } /* * if the chunk has only one page, do not writer page statistic. * so we save the data of first page and see if the chunk has more
