This is an automated email from the ASF dual-hosted git repository.
colinlee pushed a commit to branch bench_mark
in repository https://gitbox.apache.org/repos/asf/tsfile.git
The following commit(s) were added to refs/heads/bench_mark by this push:
new 968c4285 add benchmark.
968c4285 is described below
commit 968c428550ab558624079c6a5f09437305467636
Author: ColinLee <[email protected]>
AuthorDate: Sun Apr 6 13:41:16 2025 +0800
add benchmark.
---
cpp/CMakeLists.txt | 7 +-
cpp/bench_mark/CMakeLists.txt | 26 ++-
cpp/bench_mark/bench_mark_src/CMakeLists.txt | 57 ------
cpp/bench_mark/bench_mark_src/bench_mark_cpp.cc | 80 --------
.../{bench_mark_src => src}/bench_conf.h | 15 +-
.../{bench_mark_src => src}/bench_mark.h | 23 ++-
.../bench_mark_c.cpp => src/bench_mark_c.cc} | 2 +-
cpp/bench_mark/src/bench_mark_cpp.cc | 210 +++++++++++++++++++++
cpp/src/writer/time_chunk_writer.cc | 5 +
cpp/src/writer/value_chunk_writer.cc | 2 +
10 files changed, 270 insertions(+), 157 deletions(-)
diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index 75684ce0..ff1789d7 100755
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -26,7 +26,6 @@ if(CMAKE_CXX_COMPILER_ID MATCHES "GNU")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-error=maybe-uninitialized
-D__STDC_FORMAT_MACROS")
endif()
-message("cmake using: USE_CPP11=${USE_CPP11}")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11")
if(DEFINED ENV{CXX})
@@ -104,7 +103,13 @@ add_subdirectory(third_party)
add_subdirectory(src)
add_subdirectory(test)
add_subdirectory(examples)
+add_subdirectory(bench_mark)
+set(TESTS_ENABLED ON)
if(TESTS_ENABLED)
add_dependencies(TsFile_Test tsfile)
endif()
+set(BENCH_MARK_ENABLED ON)
+if(BENCH_MARK_ENABLED)
+ add_dependencies(bench_mark tsfile)
+endif()
diff --git a/cpp/bench_mark/CMakeLists.txt b/cpp/bench_mark/CMakeLists.txt
index 6db63999..ddb1b333 100644
--- a/cpp/bench_mark/CMakeLists.txt
+++ b/cpp/bench_mark/CMakeLists.txt
@@ -17,17 +17,31 @@ specific language governing permissions and limitations
under the License.
]]
message("Running in bench_mark directory")
+cmake_minimum_required(VERSION 3.1)
+project(tsfile_bench_mark_project)
+
if(DEFINED ENV{CXX})
set(CMAKE_CXX_COMPILER $ENV{CXX})
endif()
-set(CMAKE_CXX_FLAGS "$ENV{CXX_FLAGS} -Wall -Werror")
+include_directories(
+ ${LIBRARY_INCLUDE_DIR}
+ ${THIRD_PARTY_INCLUDE}
+ ${CMAKE_SOURCE_DIR}/third_party/lz4
+ ${CMAKE_SOURCE_DIR}/third_party/lzokay
+ ${CMAKE_SOURCE_DIR}/third_party/zlib-1.2.13
+ ${CMAKE_SOURCE_DIR}/third_party/google_snappy
+ ${CMAKE_SOURCE_DIR}/third_party/antlr4-cpp-runtime-4/runtime/src
+)
-if (${USE_CPP11})
- set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11")
- set(CMAKE_CXX_STANDARD 11)
+link_directories(${LIBRARY_OUTPUT_PATH})
+set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11")
+if (CMAKE_BUILD_TYPE STREQUAL "Debug")
+ SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -g -O0")
else()
- set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++03")
+ SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3")
endif()
+message("CMAKE DEBUG: CMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS}")
-add_subdirectory(bench_mark_src)
\ No newline at end of file
+add_executable(bench_mark src/bench_mark_cpp.cc src/bench_mark_c.cc)
+target_link_libraries(bench_mark tsfile)
diff --git a/cpp/bench_mark/bench_mark_src/CMakeLists.txt
b/cpp/bench_mark/bench_mark_src/CMakeLists.txt
deleted file mode 100644
index b38e457c..00000000
--- a/cpp/bench_mark/bench_mark_src/CMakeLists.txt
+++ /dev/null
@@ -1,57 +0,0 @@
-#[[
-Licensed to the Apache Software Foundation (ASF) under one
-or more contributor license agreements. See the NOTICE file
-distributed with this work for additional information
-regarding copyright ownership. The ASF licenses this file
-to you under the Apache License, Version 2.0 (the
-"License"); you may not use this file except in compliance
-with the License. You may obtain a copy of the License at
-
- https://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing,
-software distributed under the License is distributed on an
-"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-KIND, either express or implied. See the License for the
-specific language governing permissions and limitations
-under the License.
-]]
-cmake_minimum_required(VERSION 3.1)
-project(libtsfile_bench_mark_project)
-message("Running in bench_mark/bench_mark_src directory")
-if(DEFINED ENV{CXX})
- set(CMAKE_CXX_COMPILER $ENV{CXX})
-endif()
-
-set(SDK_BENCH_MARK_DIR ${PROJECT_SOURCE_DIR}/)
-message("PROJECT DIR: ${SDK_BENCH_MARK_DIR}")
-set(SDK_INCLUDE_DIR_DEBUG
${SKD_BENCHH_MARK_DIR}../../build/Debug/bin/libtsfile_sdk/include)
-set(SDK_INCLUDE_DIR_RELEASE
${SKD_BENCHH_MARK_DIR}../../build/Release/bin/libtsfile_sdk/include)
-set(SDK_LIB_DIR_DEBUG
${SKD_BENCHH_MARK_DIR}../../build/Debug/bin/libtsfile_sdk/lib)
-set(SDK_LIB_DIR_RELEASE
${SKD_BENCHH_MARK_DIR}../../build/Release/bin/libtsfile_sdk/lib)
-
-if (USE_SDK_DEBUG)
- SET(SKD_INCLUDE_DIR ${SDK_INCLUDE_DIR_DEBUG})
- SET(SDK_LIB_DIR ${SDK_LIB_DIR_DEBUG})
- SET(CMAKE_CXX_FLAGS "-g -O0")
-else()
- SET(SKD_INCLUDE_DIR ${SDK_INCLUDE_DIR_RELEASE})
- SET(SDK_LIB_DIR ${SDK_LIB_DIR_RELEASE})
- SET(CMAKE_CXX_FLAGS "-O3")
-endif()
-
-include_directories(${SKD_INCLUDE_DIR})
-set(MAKE_INCLUDE ${CMAKE_CURRENT_SOURCE_DIR}/../../src)
-include_directories(${MAKE_INCLUDE})
-message("MAKE_INCLUDE: ${MAKE_INCLUDE}")
-message("SDK_INCLUDE_DIR: ${SKD_INCLUDE_DIR}")
-message("SDK_LIB_DIR: ${SDK_LIB_DIR}")
-
-link_directories(${SDK_LIB_DIR})
-find_library(my_tsfile_lib NAMES tsfile PATHS ${SDK_LIB_DIR} NO_DEFAULT_PATH
REQUIRED)
-add_executable(bench_mark_src bench_mark_cpp.cc)
-target_link_libraries(bench_mark_src ${my_tsfile_lib})
-
-
-
-
diff --git a/cpp/bench_mark/bench_mark_src/bench_mark_cpp.cc
b/cpp/bench_mark/bench_mark_src/bench_mark_cpp.cc
deleted file mode 100644
index 4063e199..00000000
--- a/cpp/bench_mark/bench_mark_src/bench_mark_cpp.cc
+++ /dev/null
@@ -1,80 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * License); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-#include <chrono>
-#include <cmath>
-#include <iostream>
-#include <numeric>
-#include <string>
-#include <vector>
-
-#include "bench_conf.h"
-#include "bench_mark.h"
-#include "common/db_common.h"
-#include "common/global.h"
-#include "common/path.h"
-#include "file/write_file.h"
-#include "utils/db_utils.h"
-#include "writer/tsfile_table_writer.h"
-
-using namespace storage;
-using namespace common;
-
-TableSchema* gen_table_schema(int tag_num,
- const std::vector<int>& field_type_vector) {
- std::vector<common::ColumnSchema> schemas;
- for (int i = 0; i < tag_num; i++) {
- schemas.emplace_back(std::string("TAG" + std::to_string(i)),
- common::TSDataType::STRING,
- common::ColumnCategory::TAG);
- }
- for (int i = 0; i < field_type_vector.size(); i++) {
- int column_num = field_type_vector[i];
- for (int j = 0; j < column_num; j++) {
- schemas.emplace_back(
- std::string("FIELD" + bench::data_types_name[i] +
- std::to_string(j)),
- static_cast<TSDataType>(bench::data_type[i]),
- ColumnCategory::FIELD);
- }
- }
-
- return new TableSchema("TestTable", schemas);
-}
-
-
-
-
-int main() {
- int code = common::E_OK;
- print_config();
- common::init_config_value();
- // benchmark for write
- storage::WriteFile file = storage::WriteFile();
- int flags = O_WRONLY | O_CREAT | O_TRUNC;
-#ifdef _WIN32
- flags |= O_BINARY;
-#endif
- mode_t mode = 0666;
- code = file.create("bench_mark_cpp.tsfile", flags, mode);
- if (code != common::E_OK) {
- return -1;
- }
-
- TsFileTableWriter writer = new TsFileTableWriter
-}
diff --git a/cpp/bench_mark/bench_mark_src/bench_conf.h
b/cpp/bench_mark/src/bench_conf.h
similarity index 72%
rename from cpp/bench_mark/bench_mark_src/bench_conf.h
rename to cpp/bench_mark/src/bench_conf.h
index ef0b39e6..98ddc35e 100644
--- a/cpp/bench_mark/bench_mark_src/bench_conf.h
+++ b/cpp/bench_mark/src/bench_conf.h
@@ -17,12 +17,17 @@
* under the License.
*/
+#ifndef TSFILE_BENCH_MARK_BENCH_CONF_H
+#define TSFILE_BENCH_MARK_BENCH_CONF_H
+
#include <vector>
namespace bench {
-int tablet_num = 1000;
-int tablet_row_num = 10000;
-int tag_num = 2;
-std::vector<float> diversity_rate = {0.5, 0.5};
-std::vector<int> field_type_vector = {1, 1, 1, 1, 1};
+static int tablet_num = 1000;
+static int tablet_row_num = 10000;
+static int tag_num = 2;
+static std::vector<float> diversity_rate = {0.5f, 0.5f};
+static std::vector<int> field_type_vector = {1, 1, 1, 1, 1};
} // namespace bench
+
+#endif // TSFILE_BENCH_MARK_BENCH_CONF_H
diff --git a/cpp/bench_mark/bench_mark_src/bench_mark.h
b/cpp/bench_mark/src/bench_mark.h
similarity index 60%
rename from cpp/bench_mark/bench_mark_src/bench_mark.h
rename to cpp/bench_mark/src/bench_mark.h
index fa31b8c0..6fcb64ea 100644
--- a/cpp/bench_mark/bench_mark_src/bench_mark.h
+++ b/cpp/bench_mark/src/bench_mark.h
@@ -4,33 +4,42 @@
#ifndef TSFILE_BENCH_MARK_BENCH_MARK_H
#define TSFILE_BENCH_MARK_BENCH_MARK_H
+#include <iostream>
+#include "bench_conf.h"
namespace bench {
-const char* data_types_name[5] = {
+static const char* data_types_name[5] = {
"BOOLEAN", "INT32", "INT64", "FLOAT", "DOUBLE"
-}
+};
-const int data_type[5] = {
+static const int data_type[5] = {
0, 1, 2, 3, 4
};
+static int column_num = 0;
+
inline void print_config () {
std::cout << "TsFile CPP benchmark" << std::endl;
std::cout << "Schema Configuration:" << std::endl;
std::cout << "Tag Column num: " << bench::tag_num << std::endl;
std::cout << "Diversity rate of tag :" << std::endl;
- std::cout << bench::repetition_rate[0];
- for (int i = 0; i < bench::tag_num; i++) {
- std::cout << ":"<< bench::repetition_rate[i];
+ std::cout << bench::diversity_rate[0];
+ for (int i = 1; i < bench::tag_num; i++) {
+ std::cout << ":"<< bench::diversity_rate[i];
}
std::cout<<std::endl;
std::cout << "Filed Column and types: " << std::endl;
+ column_num = 0;
+ column_num += diversity_rate.size();
for (int i = 0; i < 5; i++) {
- std::cout << bench::data_types_name[i] << "x" <<
bench::field_type_vector[i] << "\t" << std::endl;
+ std::cout << bench::data_types_name[i] << "x" <<
bench::field_type_vector[i] << " ";
+ column_num += field_type_vector[i];
}
+ std::cout << std::endl;
std::cout << "Tablet num:" << bench::tablet_num << std::endl;
std::cout << "Tablet row num:" << bench::tablet_row_num << std::endl;
+ std::cout << "Total points is " << tablet_num * tablet_row_num *
column_num<< std::endl;
}
}
diff --git a/cpp/bench_mark/bench_mark_src/bench_mark_c.cpp
b/cpp/bench_mark/src/bench_mark_c.cc
similarity index 59%
rename from cpp/bench_mark/bench_mark_src/bench_mark_c.cpp
rename to cpp/bench_mark/src/bench_mark_c.cc
index b52c9cff..b9988fb0 100644
--- a/cpp/bench_mark/bench_mark_src/bench_mark_c.cpp
+++ b/cpp/bench_mark/src/bench_mark_c.cc
@@ -2,4 +2,4 @@
// Created by colin on 4/2/25.
//
-#include "bench_mark_c.h"
+#include "bench_mark.h"
diff --git a/cpp/bench_mark/src/bench_mark_cpp.cc
b/cpp/bench_mark/src/bench_mark_cpp.cc
new file mode 100644
index 00000000..f02620e3
--- /dev/null
+++ b/cpp/bench_mark/src/bench_mark_cpp.cc
@@ -0,0 +1,210 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * License); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+#include <sys/types.h>
+
+#include <chrono>
+#include <cmath>
+#include <iostream>
+#include <numeric>
+#include <string>
+#include <vector>
+
+#include "bench_conf.h"
+#include "bench_mark.h"
+#include "common/db_common.h"
+#include "common/path.h"
+#include "common/tablet.h"
+#include "file/write_file.h"
+#include "utils/db_utils.h"
+#include "writer/tsfile_table_writer.h"
+
+using namespace storage;
+using namespace common;
+
+std::vector<std::string> columns_name;
+std::vector<TSDataType> data_types;
+
+void printProgressBar(int current, int total, int barWidth = 50) {
+ float progress = static_cast<float>(current) / total;
+ int pos = barWidth * progress;
+
+ std::cout << "[";
+ for (int i = 0; i < barWidth; ++i) {
+ if (i < pos)
+ std::cout << "=";
+ else if (i == pos)
+ std::cout << ">";
+ else
+ std::cout << " ";
+ }
+ std::cout << "] " << int(progress * 100.0) << " %\r";
+ std::cout.flush();
+}
+
+TableSchema* gen_table_schema(int tag_num,
+ const std::vector<int>& field_type_vector) {
+ std::vector<common::ColumnSchema> schemas;
+ for (int i = 0; i < tag_num; i++) {
+ std::string column_name = std::string("TAG" + std::to_string(i));
+ schemas.emplace_back(column_name, common::TSDataType::STRING,
+ common::ColumnCategory::TAG);
+ columns_name.push_back(column_name);
+ data_types.push_back(TSDataType::STRING);
+ }
+ for (int i = 0; i < field_type_vector.size(); i++) {
+ int column_num = field_type_vector[i];
+ for (int j = 0; j < column_num; j++) {
+ std::string column_name =
+ std::string("FIELD" + std::to_string(i) + std::to_string(j));
+ TSDataType data_type =
static_cast<TSDataType>(bench::data_type[i]);
+ data_types.push_back(data_type);
+ columns_name.push_back(column_name);
+ schemas.emplace_back(column_name, data_type,
ColumnCategory::FIELD);
+ }
+ }
+ return new TableSchema("TestTable", schemas);
+}
+
+int main() {
+ int code = common::E_OK;
+ bench::print_config();
+ common::init_config_value();
+ // benchmark for write
+ storage::WriteFile file = storage::WriteFile();
+ int flags = O_WRONLY | O_CREAT | O_TRUNC;
+#ifdef _WIN32
+ flags |= O_BINARY;
+#endif
+ mode_t mode = 0666;
+ code = file.create("bench_mark_cpp.tsfile", flags, mode);
+ if (code != common::E_OK) {
+ return -1;
+ }
+
+ TableSchema* table_schema =
+ gen_table_schema(bench::tag_num, bench::field_type_vector);
+ auto writer = new TsFileTableWriter(&file, table_schema);
+ delete (table_schema);
+ int64_t timestamp = 0;
+ auto start = std::chrono::high_resolution_clock::now();
+ int64_t prepare_time = 0;
+ int64_t writing_time = 0;
+ int batch_num = bench::tablet_row_num * bench::diversity_rate[0];
+ int batch_num2 = batch_num * bench::diversity_rate[1];
+ for (int i = 0; i < bench::tablet_num; i++) {
+ printProgressBar(i, bench::tablet_num);
+ auto tablet_start = std::chrono::high_resolution_clock::now();
+ auto* tablet =
+ new Tablet(columns_name, data_types, bench::tablet_row_num);
+ int tag1_num = 0;
+ int tag2_num = 0;
+ int tag1_row_num = 0;
+ int tag2_row_num = 0;
+ for (int j = 0; j < bench::tablet_row_num; j++) {
+ if (tag1_row_num > batch_num) {
+ tag1_row_num = 0;
+ tag1_num++;
+ tag2_row_num = 0;
+ tag2_num = 0;
+ }
+ tablet->add_timestamp(j, timestamp++);
+ tablet->add_value(
+ j, 0, std::string("tag1_" + std::to_string(tag1_num)).c_str());
+ tag1_row_num++;
+ if (tag2_row_num > batch_num2) {
+ tag2_row_num = 0;
+ tag2_num++;
+ }
+ tablet->add_value(
+ j, 1, std::string("tag2_" + std::to_string(tag2_num)).c_str());
+ tag2_row_num++;
+ for (int col = 2; col < data_types.size(); col++) {
+ switch (data_types[col]) {
+ case TSDataType::INT64:
+ tablet->add_value(j, col,
+ static_cast<int64_t>(timestamp));
+ break;
+ case TSDataType::FLOAT:
+ tablet->add_value(j, col,
+ static_cast<float>(timestamp * 1.1));
+ break;
+ case TSDataType::DOUBLE:
+ tablet->add_value(j, col,
+ static_cast<double>(timestamp *
1.1));
+ break;
+ case TSDataType::INT32:
+ tablet->add_value(j, col,
+ static_cast<int32_t>(timestamp));
+ break;
+ case TSDataType::BOOLEAN:
+ tablet->add_value(j, col,
+ static_cast<bool>(timestamp % 2));
+ break;
+ default:
+ //
+ }
+ }
+ }
+ auto tablet_end = std::chrono::high_resolution_clock::now();
+ auto duration = std::chrono::duration_cast<std::chrono::microseconds>(
+ tablet_end - tablet_start);
+ prepare_time += duration.count();
+ auto write_start = std::chrono::high_resolution_clock::now();
+ writer->write_table(*tablet);
+ auto write_end = std::chrono::high_resolution_clock::now();
+ writing_time += std::chrono::duration_cast<std::chrono::microseconds>(
+ write_end - write_start)
+ .count();
+ delete tablet;
+ }
+ auto close_start = std::chrono::high_resolution_clock::now();
+ writer->close();
+ auto close_end = std::chrono::high_resolution_clock::now();
+ writing_time += std::chrono::duration_cast<std::chrono::microseconds>(
+ close_end - close_start)
+ .count();
+ delete writer;
+ auto end = std::chrono::high_resolution_clock::now();
+
+ FILE* file_to_size = fopen("bench_mark_cpp.tsfile", "rb");
+ if (!file_to_size) {
+ std::cout << "unable to open file" << std::endl;
+ return -1;
+ }
+ fseeko(file_to_size, 0, SEEK_END);
+ off_t size = ftello(file_to_size);
+ fclose(file_to_size);
+ std::cout << "finish bench mark for cpp" << std::endl;
+ std::cout << "tsfile size is " << size << " bytes " << " ~ " << size / 1024
+ << "KB" << std::endl;
+ float pre_time = prepare_time / 1000.0 / 1000.0;
+ float write_time = writing_time / 1000.0 / 1000.0;
+ std::cout << "prepare data time is " << pre_time << " s" << std::endl;
+ std::cout << "writing data time is " << write_time << " s" << std::endl;
+ std::cout << "writing speed is "
+ << static_cast<long long>(bench::tablet_num *
bench::tablet_row_num * bench::column_num /
+ (pre_time + write_time))
+ << " points/s" << std::endl;
+ std::cout << "total time is "
+ << std::chrono::duration_cast<std::chrono::microseconds>(end -
+ start)
+ .count() /
+ 1000.0 / 1000.0
+ << " s" << std::endl;
+}
diff --git a/cpp/src/writer/time_chunk_writer.cc
b/cpp/src/writer/time_chunk_writer.cc
index f5b7b240..91329884 100644
--- a/cpp/src/writer/time_chunk_writer.cc
+++ b/cpp/src/writer/time_chunk_writer.cc
@@ -57,6 +57,8 @@ void TimeChunkWriter::reset() {
}
if (first_page_statistic_ != nullptr) {
first_page_statistic_->reset();
+ } else {
+ first_page_statistic_ =
StatisticFactory::alloc_statistic(common::VECTOR);
}
time_page_writer_.reset();
chunk_header_.reset();
@@ -98,6 +100,9 @@ int TimeChunkWriter::seal_cur_page(bool end_chunk) {
time_page_writer_.destroy_page_data();
time_page_writer_.reset();
} else {
+ if (first_page_statistic_ == nullptr) {
+ std::cout<<"error"<<std::endl;
+ }
/*
* if the chunk has only one page, do not writer page statistic.
* so we save the data of first page and see if the chunk has more
diff --git a/cpp/src/writer/value_chunk_writer.cc
b/cpp/src/writer/value_chunk_writer.cc
index e29f2565..bacb958d 100644
--- a/cpp/src/writer/value_chunk_writer.cc
+++ b/cpp/src/writer/value_chunk_writer.cc
@@ -58,6 +58,8 @@ void ValueChunkWriter::reset() {
}
if (first_page_statistic_ != nullptr) {
first_page_statistic_->reset();
+ } else {
+ first_page_statistic_ = StatisticFactory::alloc_statistic(data_type_);
}
value_page_writer_.reset();
chunk_header_.reset();