This is an automated email from the ASF dual-hosted git repository.

colinlee pushed a commit to branch bench_mark
in repository https://gitbox.apache.org/repos/asf/tsfile.git

commit 26abc91811d5e6552f4f110ce2345201f84b4432
Author: ColinLee <[email protected]>
AuthorDate: Sun Apr 6 13:41:16 2025 +0800

    add benchmark.
---
 cpp/CMakeLists.txt                                 |   7 +-
 cpp/bench_mark/CMakeLists.txt                      |  26 ++-
 cpp/bench_mark/bench_mark_src/CMakeLists.txt       |  57 ------
 cpp/bench_mark/bench_mark_src/bench_mark_cpp.cc    |  80 --------
 .../{bench_mark_src => src}/bench_conf.h           |  15 +-
 .../{bench_mark_src => src}/bench_mark.h           |  23 ++-
 .../bench_mark_c.cpp => src/bench_mark_c.cc}       |   2 +-
 cpp/bench_mark/src/bench_mark_cpp.cc               | 210 +++++++++++++++++++++
 cpp/src/writer/time_chunk_writer.cc                |   3 +
 9 files changed, 266 insertions(+), 157 deletions(-)

diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index 75684ce0..ff1789d7 100755
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -26,7 +26,6 @@ if(CMAKE_CXX_COMPILER_ID MATCHES "GNU")
   set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-error=maybe-uninitialized 
-D__STDC_FORMAT_MACROS")
 endif()
 
-message("cmake using: USE_CPP11=${USE_CPP11}")
 set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11")
 
 if(DEFINED ENV{CXX})
@@ -104,7 +103,13 @@ add_subdirectory(third_party)
 add_subdirectory(src)
 add_subdirectory(test)
 add_subdirectory(examples)
+add_subdirectory(bench_mark)
+set(TESTS_ENABLED ON)
 if(TESTS_ENABLED)
     add_dependencies(TsFile_Test tsfile)
 endif()
+set(BENCH_MARK_ENABLED ON)
+if(BENCH_MARK_ENABLED)
+  add_dependencies(bench_mark tsfile)
+endif()
 
diff --git a/cpp/bench_mark/CMakeLists.txt b/cpp/bench_mark/CMakeLists.txt
index 6db63999..ddb1b333 100644
--- a/cpp/bench_mark/CMakeLists.txt
+++ b/cpp/bench_mark/CMakeLists.txt
@@ -17,17 +17,31 @@ specific language governing permissions and limitations
 under the License.
 ]]
 message("Running in bench_mark directory")
+cmake_minimum_required(VERSION 3.1)
+project(tsfile_bench_mark_project)
+
 if(DEFINED ENV{CXX})
     set(CMAKE_CXX_COMPILER $ENV{CXX})
 endif()
 
-set(CMAKE_CXX_FLAGS "$ENV{CXX_FLAGS} -Wall -Werror")
+include_directories(
+        ${LIBRARY_INCLUDE_DIR}
+        ${THIRD_PARTY_INCLUDE}
+        ${CMAKE_SOURCE_DIR}/third_party/lz4
+        ${CMAKE_SOURCE_DIR}/third_party/lzokay
+        ${CMAKE_SOURCE_DIR}/third_party/zlib-1.2.13
+        ${CMAKE_SOURCE_DIR}/third_party/google_snappy
+        ${CMAKE_SOURCE_DIR}/third_party/antlr4-cpp-runtime-4/runtime/src
+)
 
-if (${USE_CPP11})
-  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11")
-  set(CMAKE_CXX_STANDARD 11)
+link_directories(${LIBRARY_OUTPUT_PATH})
+set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11")
+if (CMAKE_BUILD_TYPE STREQUAL "Debug")
+    SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -g -O0")
 else()
-  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++03")
+    SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3")
 endif()
+message("CMAKE DEBUG: CMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS}")
 
-add_subdirectory(bench_mark_src)
\ No newline at end of file
+add_executable(bench_mark src/bench_mark_cpp.cc src/bench_mark_c.cc)
+target_link_libraries(bench_mark tsfile)
diff --git a/cpp/bench_mark/bench_mark_src/CMakeLists.txt 
b/cpp/bench_mark/bench_mark_src/CMakeLists.txt
deleted file mode 100644
index b38e457c..00000000
--- a/cpp/bench_mark/bench_mark_src/CMakeLists.txt
+++ /dev/null
@@ -1,57 +0,0 @@
-#[[
-Licensed to the Apache Software Foundation (ASF) under one
-or more contributor license agreements.  See the NOTICE file
-distributed with this work for additional information
-regarding copyright ownership.  The ASF licenses this file
-to you under the Apache License, Version 2.0 (the
-"License"); you may not use this file except in compliance
-with the License.  You may obtain a copy of the License at
-
-    https://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing,
-software distributed under the License is distributed on an
-"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-KIND, either express or implied.  See the License for the
-specific language governing permissions and limitations
-under the License.
-]]
-cmake_minimum_required(VERSION 3.1)
-project(libtsfile_bench_mark_project)
-message("Running in bench_mark/bench_mark_src directory")
-if(DEFINED ENV{CXX})
-    set(CMAKE_CXX_COMPILER $ENV{CXX})
-endif()
-
-set(SDK_BENCH_MARK_DIR ${PROJECT_SOURCE_DIR}/)
-message("PROJECT DIR: ${SDK_BENCH_MARK_DIR}")
-set(SDK_INCLUDE_DIR_DEBUG 
${SKD_BENCHH_MARK_DIR}../../build/Debug/bin/libtsfile_sdk/include)
-set(SDK_INCLUDE_DIR_RELEASE 
${SKD_BENCHH_MARK_DIR}../../build/Release/bin/libtsfile_sdk/include)
-set(SDK_LIB_DIR_DEBUG 
${SKD_BENCHH_MARK_DIR}../../build/Debug/bin/libtsfile_sdk/lib)
-set(SDK_LIB_DIR_RELEASE 
${SKD_BENCHH_MARK_DIR}../../build/Release/bin/libtsfile_sdk/lib)
-
-if (USE_SDK_DEBUG) 
-    SET(SKD_INCLUDE_DIR ${SDK_INCLUDE_DIR_DEBUG})
-    SET(SDK_LIB_DIR ${SDK_LIB_DIR_DEBUG})
-    SET(CMAKE_CXX_FLAGS "-g -O0")
-else()
-    SET(SKD_INCLUDE_DIR ${SDK_INCLUDE_DIR_RELEASE})
-    SET(SDK_LIB_DIR ${SDK_LIB_DIR_RELEASE})
-    SET(CMAKE_CXX_FLAGS "-O3")
-endif()
-
-include_directories(${SKD_INCLUDE_DIR})
-set(MAKE_INCLUDE ${CMAKE_CURRENT_SOURCE_DIR}/../../src)
-include_directories(${MAKE_INCLUDE})
-message("MAKE_INCLUDE: ${MAKE_INCLUDE}")
-message("SDK_INCLUDE_DIR: ${SKD_INCLUDE_DIR}")
-message("SDK_LIB_DIR: ${SDK_LIB_DIR}")
-
-link_directories(${SDK_LIB_DIR})
-find_library(my_tsfile_lib NAMES tsfile PATHS ${SDK_LIB_DIR} NO_DEFAULT_PATH 
REQUIRED)
-add_executable(bench_mark_src bench_mark_cpp.cc)
-target_link_libraries(bench_mark_src ${my_tsfile_lib})
-
-
-
-
diff --git a/cpp/bench_mark/bench_mark_src/bench_mark_cpp.cc 
b/cpp/bench_mark/bench_mark_src/bench_mark_cpp.cc
deleted file mode 100644
index 4063e199..00000000
--- a/cpp/bench_mark/bench_mark_src/bench_mark_cpp.cc
+++ /dev/null
@@ -1,80 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * License); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-#include <chrono>
-#include <cmath>
-#include <iostream>
-#include <numeric>
-#include <string>
-#include <vector>
-
-#include "bench_conf.h"
-#include "bench_mark.h"
-#include "common/db_common.h"
-#include "common/global.h"
-#include "common/path.h"
-#include "file/write_file.h"
-#include "utils/db_utils.h"
-#include "writer/tsfile_table_writer.h"
-
-using namespace storage;
-using namespace common;
-
-TableSchema* gen_table_schema(int tag_num,
-                              const std::vector<int>& field_type_vector) {
-    std::vector<common::ColumnSchema> schemas;
-    for (int i = 0; i < tag_num; i++) {
-        schemas.emplace_back(std::string("TAG" + std::to_string(i)),
-                             common::TSDataType::STRING,
-                             common::ColumnCategory::TAG);
-    }
-    for (int i = 0; i < field_type_vector.size(); i++) {
-        int column_num = field_type_vector[i];
-        for (int j = 0; j < column_num; j++) {
-            schemas.emplace_back(
-                std::string("FIELD" + bench::data_types_name[i] +
-                            std::to_string(j)),
-                static_cast<TSDataType>(bench::data_type[i]),
-                ColumnCategory::FIELD);
-        }
-    }
-
-    return new TableSchema("TestTable", schemas);
-}
-
-
-
-
-int main() {
-    int code = common::E_OK;
-    print_config();
-    common::init_config_value();
-    // benchmark for write
-    storage::WriteFile file = storage::WriteFile();
-    int flags = O_WRONLY | O_CREAT | O_TRUNC;
-#ifdef _WIN32
-    flags |= O_BINARY;
-#endif
-    mode_t mode = 0666;
-    code = file.create("bench_mark_cpp.tsfile", flags, mode);
-    if (code != common::E_OK) {
-        return -1;
-    }
-
-    TsFileTableWriter writer = new TsFileTableWriter
-}
diff --git a/cpp/bench_mark/bench_mark_src/bench_conf.h 
b/cpp/bench_mark/src/bench_conf.h
similarity index 72%
rename from cpp/bench_mark/bench_mark_src/bench_conf.h
rename to cpp/bench_mark/src/bench_conf.h
index ef0b39e6..98ddc35e 100644
--- a/cpp/bench_mark/bench_mark_src/bench_conf.h
+++ b/cpp/bench_mark/src/bench_conf.h
@@ -17,12 +17,17 @@
  * under the License.
  */
 
+#ifndef TSFILE_BENCH_MARK_BENCH_CONF_H
+#define TSFILE_BENCH_MARK_BENCH_CONF_H
+
 #include <vector>
 
 namespace bench {
-int tablet_num = 1000;
-int tablet_row_num = 10000;
-int tag_num = 2;
-std::vector<float> diversity_rate = {0.5, 0.5};
-std::vector<int> field_type_vector = {1, 1, 1, 1, 1};
+static int tablet_num = 1000;
+static int tablet_row_num = 10000;
+static int tag_num = 2;
+static std::vector<float> diversity_rate = {0.5f, 0.5f};
+static std::vector<int> field_type_vector = {1, 1, 1, 1, 1};
 }  // namespace bench
+
+#endif  // TSFILE_BENCH_MARK_BENCH_CONF_H
diff --git a/cpp/bench_mark/bench_mark_src/bench_mark.h 
b/cpp/bench_mark/src/bench_mark.h
similarity index 60%
rename from cpp/bench_mark/bench_mark_src/bench_mark.h
rename to cpp/bench_mark/src/bench_mark.h
index fa31b8c0..6fcb64ea 100644
--- a/cpp/bench_mark/bench_mark_src/bench_mark.h
+++ b/cpp/bench_mark/src/bench_mark.h
@@ -4,33 +4,42 @@
 
 #ifndef TSFILE_BENCH_MARK_BENCH_MARK_H
 #define TSFILE_BENCH_MARK_BENCH_MARK_H
+#include <iostream>
+#include "bench_conf.h"
 namespace bench {
 
-const char* data_types_name[5] = {
+static const char* data_types_name[5] = {
     "BOOLEAN", "INT32", "INT64", "FLOAT", "DOUBLE"
-}
+};
 
-const int data_type[5] = {
+static const int data_type[5] = {
     0, 1, 2, 3, 4
 };
 
+static int column_num = 0;
+
 inline void print_config () {
     std::cout << "TsFile CPP benchmark" << std::endl;
     std::cout << "Schema Configuration:" << std::endl;
     std::cout << "Tag Column num: " << bench::tag_num << std::endl;
     std::cout << "Diversity rate of tag :" << std::endl;
-    std::cout << bench::repetition_rate[0];
-    for (int i = 0; i < bench::tag_num; i++) {
-        std::cout << ":"<< bench::repetition_rate[i];
+    std::cout << bench::diversity_rate[0];
+    for (int i = 1; i < bench::tag_num; i++) {
+        std::cout << ":"<< bench::diversity_rate[i];
     }
 
     std::cout<<std::endl;
     std::cout << "Filed Column and types: " << std::endl;
+    column_num = 0;
+    column_num += diversity_rate.size();
     for (int i = 0; i < 5; i++) {
-        std::cout << bench::data_types_name[i] << "x" << 
bench::field_type_vector[i] << "\t" << std::endl;
+        std::cout << bench::data_types_name[i] << "x" << 
bench::field_type_vector[i] << "  ";
+        column_num += field_type_vector[i];
     }
+    std::cout << std::endl;
     std::cout << "Tablet num:" << bench::tablet_num << std::endl;
     std::cout << "Tablet row num:" << bench::tablet_row_num << std::endl;
+    std::cout << "Total points is " << tablet_num * tablet_row_num * 
column_num<< std::endl;
 }
 
 }
diff --git a/cpp/bench_mark/bench_mark_src/bench_mark_c.cpp 
b/cpp/bench_mark/src/bench_mark_c.cc
similarity index 59%
rename from cpp/bench_mark/bench_mark_src/bench_mark_c.cpp
rename to cpp/bench_mark/src/bench_mark_c.cc
index b52c9cff..b9988fb0 100644
--- a/cpp/bench_mark/bench_mark_src/bench_mark_c.cpp
+++ b/cpp/bench_mark/src/bench_mark_c.cc
@@ -2,4 +2,4 @@
 // Created by colin on 4/2/25.
 //
 
-#include "bench_mark_c.h"
+#include "bench_mark.h"
diff --git a/cpp/bench_mark/src/bench_mark_cpp.cc 
b/cpp/bench_mark/src/bench_mark_cpp.cc
new file mode 100644
index 00000000..f02620e3
--- /dev/null
+++ b/cpp/bench_mark/src/bench_mark_cpp.cc
@@ -0,0 +1,210 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * License); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+#include <sys/types.h>
+
+#include <chrono>
+#include <cmath>
+#include <iostream>
+#include <numeric>
+#include <string>
+#include <vector>
+
+#include "bench_conf.h"
+#include "bench_mark.h"
+#include "common/db_common.h"
+#include "common/path.h"
+#include "common/tablet.h"
+#include "file/write_file.h"
+#include "utils/db_utils.h"
+#include "writer/tsfile_table_writer.h"
+
+using namespace storage;
+using namespace common;
+
+std::vector<std::string> columns_name;
+std::vector<TSDataType> data_types;
+
+void printProgressBar(int current, int total, int barWidth = 50) {
+    float progress = static_cast<float>(current) / total;
+    int pos = barWidth * progress;
+
+    std::cout << "[";
+    for (int i = 0; i < barWidth; ++i) {
+        if (i < pos)
+            std::cout << "=";
+        else if (i == pos)
+            std::cout << ">";
+        else
+            std::cout << " ";
+    }
+    std::cout << "] " << int(progress * 100.0) << " %\r";
+    std::cout.flush();
+}
+
+TableSchema* gen_table_schema(int tag_num,
+                              const std::vector<int>& field_type_vector) {
+    std::vector<common::ColumnSchema> schemas;
+    for (int i = 0; i < tag_num; i++) {
+        std::string column_name = std::string("TAG" + std::to_string(i));
+        schemas.emplace_back(column_name, common::TSDataType::STRING,
+                             common::ColumnCategory::TAG);
+        columns_name.push_back(column_name);
+        data_types.push_back(TSDataType::STRING);
+    }
+    for (int i = 0; i < field_type_vector.size(); i++) {
+        int column_num = field_type_vector[i];
+        for (int j = 0; j < column_num; j++) {
+            std::string column_name =
+                std::string("FIELD" + std::to_string(i) + std::to_string(j));
+            TSDataType data_type = 
static_cast<TSDataType>(bench::data_type[i]);
+            data_types.push_back(data_type);
+            columns_name.push_back(column_name);
+            schemas.emplace_back(column_name, data_type, 
ColumnCategory::FIELD);
+        }
+    }
+    return new TableSchema("TestTable", schemas);
+}
+
+int main() {
+    int code = common::E_OK;
+    bench::print_config();
+    common::init_config_value();
+    // benchmark for write
+    storage::WriteFile file = storage::WriteFile();
+    int flags = O_WRONLY | O_CREAT | O_TRUNC;
+#ifdef _WIN32
+    flags |= O_BINARY;
+#endif
+    mode_t mode = 0666;
+    code = file.create("bench_mark_cpp.tsfile", flags, mode);
+    if (code != common::E_OK) {
+        return -1;
+    }
+
+    TableSchema* table_schema =
+        gen_table_schema(bench::tag_num, bench::field_type_vector);
+    auto writer = new TsFileTableWriter(&file, table_schema);
+    delete (table_schema);
+    int64_t timestamp = 0;
+    auto start = std::chrono::high_resolution_clock::now();
+    int64_t prepare_time = 0;
+    int64_t writing_time = 0;
+    int batch_num = bench::tablet_row_num * bench::diversity_rate[0];
+    int batch_num2 = batch_num * bench::diversity_rate[1];
+    for (int i = 0; i < bench::tablet_num; i++) {
+        printProgressBar(i, bench::tablet_num);
+        auto tablet_start = std::chrono::high_resolution_clock::now();
+        auto* tablet =
+            new Tablet(columns_name, data_types, bench::tablet_row_num);
+        int tag1_num = 0;
+        int tag2_num = 0;
+        int tag1_row_num = 0;
+        int tag2_row_num = 0;
+        for (int j = 0; j < bench::tablet_row_num; j++) {
+            if (tag1_row_num > batch_num) {
+                tag1_row_num = 0;
+                tag1_num++;
+                tag2_row_num = 0;
+                tag2_num = 0;
+            }
+            tablet->add_timestamp(j, timestamp++);
+            tablet->add_value(
+                j, 0, std::string("tag1_" + std::to_string(tag1_num)).c_str());
+            tag1_row_num++;
+            if (tag2_row_num > batch_num2) {
+                tag2_row_num = 0;
+                tag2_num++;
+            }
+            tablet->add_value(
+                j, 1, std::string("tag2_" + std::to_string(tag2_num)).c_str());
+            tag2_row_num++;
+            for (int col = 2; col < data_types.size(); col++) {
+                switch (data_types[col]) {
+                    case TSDataType::INT64:
+                        tablet->add_value(j, col,
+                                          static_cast<int64_t>(timestamp));
+                        break;
+                    case TSDataType::FLOAT:
+                        tablet->add_value(j, col,
+                                          static_cast<float>(timestamp * 1.1));
+                        break;
+                    case TSDataType::DOUBLE:
+                        tablet->add_value(j, col,
+                                          static_cast<double>(timestamp * 
1.1));
+                        break;
+                    case TSDataType::INT32:
+                        tablet->add_value(j, col,
+                                          static_cast<int32_t>(timestamp));
+                        break;
+                    case TSDataType::BOOLEAN:
+                        tablet->add_value(j, col,
+                                          static_cast<bool>(timestamp % 2));
+                        break;
+                    default:
+                        //
+                }
+            }
+        }
+        auto tablet_end = std::chrono::high_resolution_clock::now();
+        auto duration = std::chrono::duration_cast<std::chrono::microseconds>(
+            tablet_end - tablet_start);
+        prepare_time += duration.count();
+        auto write_start = std::chrono::high_resolution_clock::now();
+        writer->write_table(*tablet);
+        auto write_end = std::chrono::high_resolution_clock::now();
+        writing_time += std::chrono::duration_cast<std::chrono::microseconds>(
+                            write_end - write_start)
+                            .count();
+        delete tablet;
+    }
+    auto close_start = std::chrono::high_resolution_clock::now();
+    writer->close();
+    auto close_end = std::chrono::high_resolution_clock::now();
+    writing_time += std::chrono::duration_cast<std::chrono::microseconds>(
+                        close_end - close_start)
+                        .count();
+    delete writer;
+    auto end = std::chrono::high_resolution_clock::now();
+
+    FILE* file_to_size = fopen("bench_mark_cpp.tsfile", "rb");
+    if (!file_to_size) {
+        std::cout << "unable to open file" << std::endl;
+        return -1;
+    }
+    fseeko(file_to_size, 0, SEEK_END);
+    off_t size = ftello(file_to_size);
+    fclose(file_to_size);
+    std::cout << "finish bench mark for cpp" << std::endl;
+    std::cout << "tsfile size is " << size << " bytes " << " ~ " << size / 1024
+              << "KB" << std::endl;
+    float pre_time = prepare_time / 1000.0 / 1000.0;
+    float write_time = writing_time / 1000.0 / 1000.0;
+    std::cout << "prepare data time is " << pre_time << " s" << std::endl;
+    std::cout << "writing data time is " << write_time << " s" << std::endl;
+    std::cout << "writing speed is "
+              << static_cast<long long>(bench::tablet_num * 
bench::tablet_row_num * bench::column_num /
+                     (pre_time + write_time))
+              << " points/s" << std::endl;
+    std::cout << "total time is "
+              << std::chrono::duration_cast<std::chrono::microseconds>(end -
+                                                                       start)
+                         .count() /
+                     1000.0 / 1000.0
+              << " s" << std::endl;
+}
diff --git a/cpp/src/writer/time_chunk_writer.cc 
b/cpp/src/writer/time_chunk_writer.cc
index 892c0d1c..35684f59 100644
--- a/cpp/src/writer/time_chunk_writer.cc
+++ b/cpp/src/writer/time_chunk_writer.cc
@@ -100,6 +100,9 @@ int TimeChunkWriter::seal_cur_page(bool end_chunk) {
             time_page_writer_.destroy_page_data();
             time_page_writer_.reset();
         } else {
+            if (first_page_statistic_ == nullptr) {
+                std::cout<<"error"<<std::endl;
+            }
             /*
              * if the chunk has only one page, do not writer page statistic.
              * so we save the data of first page and see if the chunk has more

Reply via email to