This is an automated email from the ASF dual-hosted git repository.

colinlee pushed a commit to branch bench_mark
in repository https://gitbox.apache.org/repos/asf/tsfile.git

commit c9965ba8b1322a5b2349a314bb09a5e2071746cd
Author: colin <[email protected]>
AuthorDate: Sat Apr 5 20:49:06 2025 +0800

    tmp benchmark.
---
 cpp/bench_mark/bench_mark_src/CMakeLists.txt    |   2 +-
 cpp/bench_mark/bench_mark_src/bench_conf.h      |   9 +-
 cpp/bench_mark/bench_mark_src/bench_mark.cc     | 161 ------------------------
 cpp/bench_mark/bench_mark_src/bench_mark.h      |  57 ++++++---
 cpp/bench_mark/bench_mark_src/bench_mark_c.cpp  |   5 +
 cpp/bench_mark/bench_mark_src/bench_mark_cpp.cc |  80 ++++++++++++
 6 files changed, 130 insertions(+), 184 deletions(-)

diff --git a/cpp/bench_mark/bench_mark_src/CMakeLists.txt 
b/cpp/bench_mark/bench_mark_src/CMakeLists.txt
index dbad71f9..b38e457c 100644
--- a/cpp/bench_mark/bench_mark_src/CMakeLists.txt
+++ b/cpp/bench_mark/bench_mark_src/CMakeLists.txt
@@ -49,7 +49,7 @@ message("SDK_LIB_DIR: ${SDK_LIB_DIR}")
 
 link_directories(${SDK_LIB_DIR})
 find_library(my_tsfile_lib NAMES tsfile PATHS ${SDK_LIB_DIR} NO_DEFAULT_PATH 
REQUIRED)
-add_executable(bench_mark_src bench_mark.cc)
+add_executable(bench_mark_src bench_mark_cpp.cc)
 target_link_libraries(bench_mark_src ${my_tsfile_lib})
 
 
diff --git a/cpp/bench_mark/bench_mark_src/bench_conf.h 
b/cpp/bench_mark/bench_mark_src/bench_conf.h
index 486d0b14..ef0b39e6 100644
--- a/cpp/bench_mark/bench_mark_src/bench_conf.h
+++ b/cpp/bench_mark/bench_mark_src/bench_conf.h
@@ -20,8 +20,9 @@
 #include <vector>
 
 namespace bench {
-int LOOP_NUM = 100000;
-int THREAD_NUM = 1;
-int TIMESERIES_NUM = 50;
-std::vector<int> TYPE_LIST = {0, 0, 1, 0, 1};
+int tablet_num = 1000;
+int tablet_row_num = 10000;
+int tag_num = 2;
+std::vector<float> diversity_rate = {0.5, 0.5};
+std::vector<int> field_type_vector = {1, 1, 1, 1, 1};
 }  // namespace bench
diff --git a/cpp/bench_mark/bench_mark_src/bench_mark.cc 
b/cpp/bench_mark/bench_mark_src/bench_mark.cc
deleted file mode 100644
index 09c9eb98..00000000
--- a/cpp/bench_mark/bench_mark_src/bench_mark.cc
+++ /dev/null
@@ -1,161 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * License); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-#include <chrono>
-#include <cmath>
-#include <iostream>
-#include <numeric>
-#include <string>
-
-#include "bench_conf.h"
-#include "common/db_common.h"
-#include "common/global.h"
-#include "common/path.h"
-#include "writer/tsfile_writer.h"
-
-std::vector<int> register_timeseries(storage::TsFileWriter& writer,
-                                     int timeseries_num,
-                                     std::vector<int> type_list) {
-    auto start = std::chrono::high_resolution_clock::now();
-    int sum = std::accumulate(type_list.begin(), type_list.end(), 0);
-    std::vector<float> ratio_list;
-    for (int i = 0; i < type_list.size(); i++) {
-        ratio_list.push_back((float)type_list[i] / sum);
-    }
-    std::vector<int> type_num;
-    for (int i = 0; i < common::TSDataType::TEXT - 1; i++) {
-        type_num.push_back((int)std::ceil(timeseries_num * ratio_list[i]));
-    }
-    type_num.push_back(timeseries_num -
-                       std::accumulate(type_num.begin(), type_num.end(), 0));
-    writer.open("/tmp/tsfile_test.tsfile", O_CREAT | O_RDWR, 0644);
-    int ind = 0;
-    int ret = 0;
-    int type = 0;
-    for (auto num : type_num) {
-        for (int i = 0; i < num; i++) {
-            std::string device_name = "root.db001.dev" + std::to_string(ind);
-            std::string measurement_name = "m" + std::to_string(ind);
-            ret = writer.register_timeseries(
-                device_name, measurement_name, (common::TSDataType)type,
-                common::TSEncoding::PLAIN,
-                common::CompressionType::UNCOMPRESSED);
-            ind++;
-        }
-        std::cout << "register finished for TsDataType"
-                  << common::s_data_type_names[type]
-                  << " timeseries num: " << num << std::endl;
-        type++;
-    }
-    auto end = std::chrono::high_resolution_clock::now();
-    std::chrono::duration<double> elapsed = end - start;
-    std::cout << "register " << timeseries_num << "timeseries in file"
-              << "./test_data/tsfile_test.tsfile" << std::endl;
-    std::cout << "register timeseries cost time: " << elapsed.count() << "s"
-              << std::endl;
-    return type_num;
-}
-
-void test_writer_benchmark(storage::TsFileWriter& writer, int loop_num,
-                           std::vector<int> type_num) {
-    std::cout << "start writing data" << std::endl;
-    auto start = std::chrono::high_resolution_clock::now();
-    int type = 0;
-    for (int i = 0; i < loop_num; i++) {
-        int ind = 0;
-        for (auto num : type_num) {
-            for (int j = 0; j < num; j++) {
-                std::string device_name =
-                    "root.db001.dev" + std::to_string(ind);
-                std::string measurement_name = "m" + std::to_string(ind);
-                long long currentTimeStamp = i;
-                storage::TsRecord record(currentTimeStamp, device_name, 1);
-                switch (type) {
-                    case common::INT32: {
-                        storage::DataPoint point(measurement_name, 10000 + i);
-                        record.points_.push_back(point);
-                        break;
-                    }
-                    case common::INT64: {
-                        storage::DataPoint point(measurement_name,
-                                                 int64_t(10000 + i));
-                        record.points_.push_back(point);
-                        break;
-                    }
-                    case common::BOOLEAN: {
-                        storage::DataPoint point(measurement_name, i / 2 == 0);
-                        record.points_.push_back(point);
-                        break;
-                    }
-                    case common::FLOAT: {
-                        storage::DataPoint point(measurement_name, (float)i);
-                        record.points_.push_back(point);
-                        break;
-                    }
-                    case common::DOUBLE: {
-                        storage::DataPoint point(measurement_name, (double)i);
-                        record.points_.push_back(point);
-                        break;
-                    }
-                }
-                int ret = writer.write_record(record);
-                ASSERT(ret == 0);
-                ind++;
-            }
-            type++;
-        }
-    }
-
-    auto end = std::chrono::high_resolution_clock::now();
-    std::chrono::duration<double> elapsed = end - start;
-    int timeseries_num = std::accumulate(type_num.begin(), type_num.end(), 0);
-    std::cout << "writer loop: " << loop_num
-              << " timeseries num: " << timeseries_num << " records in file"
-              << "./test_data/tsfile_test.tsfile" << std::endl;
-    std::cout << "total num of points: " << loop_num * timeseries_num
-              << std::endl;
-    std::cout << "writer data cost time: " << elapsed.count() << "s"
-              << std::endl;
-    std::cout << "writer data speed:"
-              << loop_num * timeseries_num / elapsed.count() << " points/s"
-              << std::endl;
-    writer.flush();
-    writer.close();
-    auto end_flush = std::chrono::high_resolution_clock::now();
-    std::chrono::duration<double> elapsed_flush = end_flush - end;
-    std::cout << "flush data cost time: " << elapsed_flush.count() << "s"
-              << std::endl;
-}
-
-int main() {
-    std::cout << "LibTsFile benchmark" << std::endl;
-    std::cout << "LOOP_NUM:" << bench::LOOP_NUM << std::endl;
-    std::cout << "THREAD_NUM:" << bench::THREAD_NUM << std::endl;
-    std::cout << "TIMESERIES_NUM:" << bench::TIMESERIES_NUM << std::endl;
-    std::cout << "TYPE_LIST: " << bench::TYPE_LIST[0] << ":"
-              << bench::TYPE_LIST[1] << ":" << bench::TYPE_LIST[2] << ":"
-              << bench::TYPE_LIST[3] << ":" << bench::TYPE_LIST[4] << ":"
-              << bench::TYPE_LIST[5] << std::endl;
-    std::cout << "init tsfile config value" << std::endl;
-    common::init_config_value();
-    storage::TsFileWriter writer;
-    auto type_num =
-        register_timeseries(writer, bench::TIMESERIES_NUM, bench::TYPE_LIST);
-    test_writer_benchmark(writer, bench::LOOP_NUM, type_num);
-    return 0;
-}
diff --git a/cpp/bench_mark/bench_mark_src/bench_mark.h 
b/cpp/bench_mark/bench_mark_src/bench_mark.h
index e3bbd95d..fa31b8c0 100644
--- a/cpp/bench_mark/bench_mark_src/bench_mark.h
+++ b/cpp/bench_mark/bench_mark_src/bench_mark.h
@@ -1,18 +1,39 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * License); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
+//
+// Created by colin on 4/2/25.
+//
+
+#ifndef TSFILE_BENCH_MARK_BENCH_MARK_H
+#define TSFILE_BENCH_MARK_BENCH_MARK_H
+namespace bench {
+
+const char* data_types_name[5] = {
+    "BOOLEAN", "INT32", "INT64", "FLOAT", "DOUBLE"
+}
+
+const int data_type[5] = {
+    0, 1, 2, 3, 4
+};
+
+inline void print_config () {
+    std::cout << "TsFile CPP benchmark" << std::endl;
+    std::cout << "Schema Configuration:" << std::endl;
+    std::cout << "Tag Column num: " << bench::tag_num << std::endl;
+    std::cout << "Diversity rate of tag :" << std::endl;
+    std::cout << bench::repetition_rate[0];
+    for (int i = 0; i < bench::tag_num; i++) {
+        std::cout << ":"<< bench::repetition_rate[i];
+    }
+
+    std::cout<<std::endl;
+    std::cout << "Filed Column and types: " << std::endl;
+    for (int i = 0; i < 5; i++) {
+        std::cout << bench::data_types_name[i] << "x" << 
bench::field_type_vector[i] << "\t" << std::endl;
+    }
+    std::cout << "Tablet num:" << bench::tablet_num << std::endl;
+    std::cout << "Tablet row num:" << bench::tablet_row_num << std::endl;
+}
+
+}
+
+
+#endif  // TSFILE_BENCH_MARK_BENCH_MARK_H
diff --git a/cpp/bench_mark/bench_mark_src/bench_mark_c.cpp 
b/cpp/bench_mark/bench_mark_src/bench_mark_c.cpp
new file mode 100644
index 00000000..b52c9cff
--- /dev/null
+++ b/cpp/bench_mark/bench_mark_src/bench_mark_c.cpp
@@ -0,0 +1,5 @@
+//
+// Created by colin on 4/2/25.
+//
+
+#include "bench_mark_c.h"
diff --git a/cpp/bench_mark/bench_mark_src/bench_mark_cpp.cc 
b/cpp/bench_mark/bench_mark_src/bench_mark_cpp.cc
new file mode 100644
index 00000000..4063e199
--- /dev/null
+++ b/cpp/bench_mark/bench_mark_src/bench_mark_cpp.cc
@@ -0,0 +1,80 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * License); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+#include <chrono>
+#include <cmath>
+#include <iostream>
+#include <numeric>
+#include <string>
+#include <vector>
+
+#include "bench_conf.h"
+#include "bench_mark.h"
+#include "common/db_common.h"
+#include "common/global.h"
+#include "common/path.h"
+#include "file/write_file.h"
+#include "utils/db_utils.h"
+#include "writer/tsfile_table_writer.h"
+
+using namespace storage;
+using namespace common;
+
+TableSchema* gen_table_schema(int tag_num,
+                              const std::vector<int>& field_type_vector) {
+    std::vector<common::ColumnSchema> schemas;
+    for (int i = 0; i < tag_num; i++) {
+        schemas.emplace_back(std::string("TAG" + std::to_string(i)),
+                             common::TSDataType::STRING,
+                             common::ColumnCategory::TAG);
+    }
+    for (int i = 0; i < field_type_vector.size(); i++) {
+        int column_num = field_type_vector[i];
+        for (int j = 0; j < column_num; j++) {
+            schemas.emplace_back(
+                std::string("FIELD" + bench::data_types_name[i] +
+                            std::to_string(j)),
+                static_cast<TSDataType>(bench::data_type[i]),
+                ColumnCategory::FIELD);
+        }
+    }
+
+    return new TableSchema("TestTable", schemas);
+}
+
+
+
+
+int main() {
+    int code = common::E_OK;
+    print_config();
+    common::init_config_value();
+    // benchmark for write
+    storage::WriteFile file = storage::WriteFile();
+    int flags = O_WRONLY | O_CREAT | O_TRUNC;
+#ifdef _WIN32
+    flags |= O_BINARY;
+#endif
+    mode_t mode = 0666;
+    code = file.create("bench_mark_cpp.tsfile", flags, mode);
+    if (code != common::E_OK) {
+        return -1;
+    }
+
+    TsFileTableWriter writer = new TsFileTableWriter
+}

Reply via email to