This is an automated email from the ASF dual-hosted git repository. wesm pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push: new eb96726 ARROW-5696: [C++][Gandiva] Introduce castVarcharVarchar eb96726 is described below commit eb9672686f6f0e5c24fd47de963afc6ceb0611cc Author: Praveen <prav...@dremio.com> AuthorDate: Mon Jun 24 19:46:35 2019 -0500 ARROW-5696: [C++][Gandiva] Introduce castVarcharVarchar Support cast varchar to varchar with specified lengths. Author: Praveen <prav...@dremio.com> Closes #4663 from praveenbingo/castVarChar and squashes the following commits: 0dbfeec5a <Praveen> Fix lint. b79e83b7f <Praveen> fix windows failures. b40762e27 <Praveen> Fix lint issues. 5d1526dad <Praveen> Fix review comments. afd8a48ba <Praveen> Fix lint issues. 331d7df61 <Praveen> Introduce castVarcharVarchar --- cpp/src/gandiva/function_registry_string.cc | 4 +++ cpp/src/gandiva/precompiled/string_ops.cc | 13 ++++++++ cpp/src/gandiva/tests/utf8_test.cc | 48 +++++++++++++++++++++++++++++ 3 files changed, 65 insertions(+) diff --git a/cpp/src/gandiva/function_registry_string.cc b/cpp/src/gandiva/function_registry_string.cc index b00faf4..19e31c8 100644 --- a/cpp/src/gandiva/function_registry_string.cc +++ b/cpp/src/gandiva/function_registry_string.cc @@ -57,6 +57,10 @@ std::vector<NativeFunction> GetStringFunctionRegistry() { NativeFunction("upper", DataTypeVector{utf8()}, utf8(), kResultNullIfNull, "upper_utf8", NativeFunction::kNeedsContext), + NativeFunction("castVARCHAR", DataTypeVector{utf8(), int64()}, utf8(), + kResultNullIfNull, "castVARCHAR_utf8_int64", + NativeFunction::kNeedsContext), + NativeFunction("like", DataTypeVector{utf8(), utf8()}, boolean(), kResultNullIfNull, "gdv_fn_like_utf8_utf8", NativeFunction::kNeedsFunctionHolder)}; diff --git a/cpp/src/gandiva/precompiled/string_ops.cc b/cpp/src/gandiva/precompiled/string_ops.cc index a0f2c19..e65ca2d 100644 --- a/cpp/src/gandiva/precompiled/string_ops.cc +++ b/cpp/src/gandiva/precompiled/string_ops.cc @@ -160,6 +160,19 @@ char* upper_utf8(int64 context, const char* data, int32 data_len, int32_t* out_l return ret; } +// Truncates the string to given length +FORCE_INLINE +char* castVARCHAR_utf8_int64(int64 context, const char* data, int32 data_len, + int64_t out_len, int32_t* out_length) { + // TODO: handle allocation failures + int32_t len = data_len <= static_cast<int32_t>(out_len) ? data_len + : static_cast<int32_t>(out_len); + char* ret = reinterpret_cast<char*>(gdv_fn_context_arena_malloc(context, len)); + memcpy(ret, data, len); + *out_length = len; + return ret; +} + #define IS_NULL(NAME, TYPE) \ FORCE_INLINE \ bool NAME##_##TYPE(TYPE in, int32 len, boolean is_valid) { return !is_valid; } diff --git a/cpp/src/gandiva/tests/utf8_test.cc b/cpp/src/gandiva/tests/utf8_test.cc index 6df4da6..ea9a76c 100644 --- a/cpp/src/gandiva/tests/utf8_test.cc +++ b/cpp/src/gandiva/tests/utf8_test.cc @@ -521,4 +521,52 @@ TEST_F(TestUtf8, TestVarlenOutput) { Projector::Make(schema, {expr}, TestConfiguration(), &projector)); } +TEST_F(TestUtf8, TestCastVarChar) { + // schema for input fields + auto field_a = field("a", utf8()); + auto field_c = field("c", utf8()); + auto schema = arrow::schema({field_a, field_c}); + + // output fields + auto res = field("res", boolean()); + + // build expressions. + auto node_a = TreeExprBuilder::MakeField(field_a); + auto node_c = TreeExprBuilder::MakeField(field_c); + // truncates the string to input length + auto node_b = TreeExprBuilder::MakeLiteral(static_cast<int64_t>(10)); + auto cast_varchar = + TreeExprBuilder::MakeFunction("castVARCHAR", {node_a, node_b}, utf8()); + auto equals = TreeExprBuilder::MakeFunction("equal", {cast_varchar, node_c}, boolean()); + auto expr = TreeExprBuilder::MakeExpression(equals, res); + + // Build a projector for the expressions. + std::shared_ptr<Projector> projector; + auto status = Projector::Make(schema, {expr}, TestConfiguration(), &projector); + EXPECT_TRUE(status.ok()) << status.message(); + + // Create a row-batch with some sample data + int num_records = 5; + auto array_a = MakeArrowArrayUtf8( + {"park", "Sparkle", "bright spark and fire", "fiery SPARK", "मदन"}, + {true, true, false, true, true}); + + auto array_b = + MakeArrowArrayUtf8({"park", "Sparkle", "bright spar", "fiery SPAR", "मदन"}, + {true, true, true, true, true}); + + // prepare input record batch + auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array_a, array_b}); + + // Evaluate expression + arrow::ArrayVector outputs; + status = projector->Evaluate(*in_batch, pool_, &outputs); + EXPECT_TRUE(status.ok()) << status.message(); + + auto exp = MakeArrowArrayBool({true, true, false, true, true}, + {true, true, false, true, true}); + // Validate results + EXPECT_ARROW_ARRAY_EQUALS(exp, outputs[0]); +} + } // namespace gandiva