[ https://issues.apache.org/jira/browse/ARROW-17721?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Jiangtao Peng resolved ARROW-17721. ----------------------------------- Resolution: Fixed > [C++][Gandiva] Expression Evaluation Performance Improvement using Mimalloc > --------------------------------------------------------------------------- > > Key: ARROW-17721 > URL: https://issues.apache.org/jira/browse/ARROW-17721 > Project: Apache Arrow > Issue Type: Improvement > Components: C++ - Gandiva > Reporter: Jiangtao Peng > Assignee: Jin Shang > Priority: Major > > Arrow use jemalloc as default memory allocator. For some reason, I am going > to use mimalloc instead. But there seems have big performance difference > between two memory allocators. > Here are my steps. > I use simple compile options: > {code:java} > -DCMAKE_BUILD_TYPE=debug > -DARROW_JEMALLOC=OFF|ON > -DARROW_MIMALLOC=ON|OFF > -DARROW_GANDIVA=ON > -DARROW_GANDIVA_STATIC_LIBSTDCPP=ON > -DARROW_BUILD_TESTS=ON > {code} > > Then I write a simple case: > {code:cpp} > #include <gtest/gtest.h> > #include "arrow/memory_pool.h" > #include "arrow/status.h" > #include "gandiva/projector.h" > #include "gandiva/tests/test_util.h" > #include "gandiva/tree_expr_builder.h" > #include <chrono> > #include <iostream> > namespace gandiva { > using arrow::boolean; > using arrow::date64; > using arrow::int32; > using arrow::int64; > using arrow::utf8; > class TestUtf8Perf : public ::testing::Test { > public: > void SetUp() { pool_ = arrow::default_memory_pool(); } > protected: > arrow::MemoryPool* pool_; > }; > void TestPerf(int64_t char_length, int64_t num_records) { > // schema for input fields > auto field_a = field("a", utf8()); > auto schema = arrow::schema({field_a}); > // output fields > auto res = field("res", utf8()); > auto node_a = TreeExprBuilder::MakeField(field_a); > auto upper_a = TreeExprBuilder::MakeFunction("upper", {node_a}, utf8()); > auto expr = TreeExprBuilder::MakeExpression(upper_a, res); > // Build a projector for the expressions. > std::shared_ptr<Projector> projector; > auto status = Projector::Make(schema, {expr}, TestConfiguration(), > &projector); > EXPECT_TRUE(status.ok()) << status.message(); > std::string val = std::string(char_length, 'a'); > arrow::StringBuilder builder; > for (int i = 0; i < num_records; i++) { > auto _ = builder.Append(val); > } > std::shared_ptr<arrow::StringArray> array_a; > auto _ = builder.Finish(&array_a); > // prepare input record batch > auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array_a}); > auto start_epoch = std::chrono::duration_cast<std::chrono::milliseconds>( > std::chrono::system_clock::now().time_since_epoch()) > .count(); > // Evaluate expression > arrow::ArrayVector outputs; > status = projector->Evaluate(*in_batch, pool_, &outputs); > EXPECT_TRUE(status.ok()) << status.message(); > std::cout << std::chrono::duration_cast<std::chrono::milliseconds>( > std::chrono::system_clock::now().time_since_epoch()) > .count() - > start_epoch > << "ms" << std::endl; > } > TEST_F(TestUtf8Perf, TestMemoryAllocsPerf) { > TestPerf(20, 10000); > TestPerf(20, 100000); > TestPerf(200, 10000); > TestPerf(200, 100000); > TestPerf(2000, 10000); > } > } // namespace gandiva > {code} > this case is going to calculate expression {*}upper(a){*}, *a* has different > size with 20/200/2000. Evaluation time results are: > |char_length|num_records|Using Mimalloc (ms)|Using Jemalloc(ms)| > |20|10000|29|3| > |20|100000|2686|26| > |200|10000|954|11| > |200|100000|220153|118| > |2000|10000|21162|89| > > Is this performance gap expected? Or any other compile options should I note? > How to make performance better using mimalloc? -- This message was sent by Atlassian Jira (v8.20.10#820010)