This is an automated email from the ASF dual-hosted git repository.

wesm pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new eb96726  ARROW-5696: [C++][Gandiva] Introduce castVarcharVarchar
eb96726 is described below

commit eb9672686f6f0e5c24fd47de963afc6ceb0611cc
Author: Praveen <prav...@dremio.com>
AuthorDate: Mon Jun 24 19:46:35 2019 -0500

    ARROW-5696: [C++][Gandiva] Introduce castVarcharVarchar
    
    Support cast varchar to varchar with specified lengths.
    
    Author: Praveen <prav...@dremio.com>
    
    Closes #4663 from praveenbingo/castVarChar and squashes the following 
commits:
    
    0dbfeec5a <Praveen> Fix lint.
    b79e83b7f <Praveen> fix windows failures.
    b40762e27 <Praveen> Fix lint issues.
    5d1526dad <Praveen> Fix review comments.
    afd8a48ba <Praveen> Fix lint issues.
    331d7df61 <Praveen> Introduce castVarcharVarchar
---
 cpp/src/gandiva/function_registry_string.cc |  4 +++
 cpp/src/gandiva/precompiled/string_ops.cc   | 13 ++++++++
 cpp/src/gandiva/tests/utf8_test.cc          | 48 +++++++++++++++++++++++++++++
 3 files changed, 65 insertions(+)

diff --git a/cpp/src/gandiva/function_registry_string.cc 
b/cpp/src/gandiva/function_registry_string.cc
index b00faf4..19e31c8 100644
--- a/cpp/src/gandiva/function_registry_string.cc
+++ b/cpp/src/gandiva/function_registry_string.cc
@@ -57,6 +57,10 @@ std::vector<NativeFunction> GetStringFunctionRegistry() {
       NativeFunction("upper", DataTypeVector{utf8()}, utf8(), 
kResultNullIfNull,
                      "upper_utf8", NativeFunction::kNeedsContext),
 
+      NativeFunction("castVARCHAR", DataTypeVector{utf8(), int64()}, utf8(),
+                     kResultNullIfNull, "castVARCHAR_utf8_int64",
+                     NativeFunction::kNeedsContext),
+
       NativeFunction("like", DataTypeVector{utf8(), utf8()}, boolean(), 
kResultNullIfNull,
                      "gdv_fn_like_utf8_utf8", 
NativeFunction::kNeedsFunctionHolder)};
 
diff --git a/cpp/src/gandiva/precompiled/string_ops.cc 
b/cpp/src/gandiva/precompiled/string_ops.cc
index a0f2c19..e65ca2d 100644
--- a/cpp/src/gandiva/precompiled/string_ops.cc
+++ b/cpp/src/gandiva/precompiled/string_ops.cc
@@ -160,6 +160,19 @@ char* upper_utf8(int64 context, const char* data, int32 
data_len, int32_t* out_l
   return ret;
 }
 
+// Truncates the string to given length
+FORCE_INLINE
+char* castVARCHAR_utf8_int64(int64 context, const char* data, int32 data_len,
+                             int64_t out_len, int32_t* out_length) {
+  // TODO: handle allocation failures
+  int32_t len = data_len <= static_cast<int32_t>(out_len) ? data_len
+                                                          : 
static_cast<int32_t>(out_len);
+  char* ret = reinterpret_cast<char*>(gdv_fn_context_arena_malloc(context, 
len));
+  memcpy(ret, data, len);
+  *out_length = len;
+  return ret;
+}
+
 #define IS_NULL(NAME, TYPE) \
   FORCE_INLINE              \
   bool NAME##_##TYPE(TYPE in, int32 len, boolean is_valid) { return !is_valid; 
}
diff --git a/cpp/src/gandiva/tests/utf8_test.cc 
b/cpp/src/gandiva/tests/utf8_test.cc
index 6df4da6..ea9a76c 100644
--- a/cpp/src/gandiva/tests/utf8_test.cc
+++ b/cpp/src/gandiva/tests/utf8_test.cc
@@ -521,4 +521,52 @@ TEST_F(TestUtf8, TestVarlenOutput) {
                 Projector::Make(schema, {expr}, TestConfiguration(), 
&projector));
 }
 
+TEST_F(TestUtf8, TestCastVarChar) {
+  // schema for input fields
+  auto field_a = field("a", utf8());
+  auto field_c = field("c", utf8());
+  auto schema = arrow::schema({field_a, field_c});
+
+  // output fields
+  auto res = field("res", boolean());
+
+  // build expressions.
+  auto node_a = TreeExprBuilder::MakeField(field_a);
+  auto node_c = TreeExprBuilder::MakeField(field_c);
+  // truncates the string to input length
+  auto node_b = TreeExprBuilder::MakeLiteral(static_cast<int64_t>(10));
+  auto cast_varchar =
+      TreeExprBuilder::MakeFunction("castVARCHAR", {node_a, node_b}, utf8());
+  auto equals = TreeExprBuilder::MakeFunction("equal", {cast_varchar, node_c}, 
boolean());
+  auto expr = TreeExprBuilder::MakeExpression(equals, res);
+
+  // Build a projector for the expressions.
+  std::shared_ptr<Projector> projector;
+  auto status = Projector::Make(schema, {expr}, TestConfiguration(), 
&projector);
+  EXPECT_TRUE(status.ok()) << status.message();
+
+  // Create a row-batch with some sample data
+  int num_records = 5;
+  auto array_a = MakeArrowArrayUtf8(
+      {"park", "Sparkle", "bright spark and fire", "fiery SPARK", "मदन"},
+      {true, true, false, true, true});
+
+  auto array_b =
+      MakeArrowArrayUtf8({"park", "Sparkle", "bright spar", "fiery SPAR", 
"मदन"},
+                         {true, true, true, true, true});
+
+  // prepare input record batch
+  auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array_a, 
array_b});
+
+  // Evaluate expression
+  arrow::ArrayVector outputs;
+  status = projector->Evaluate(*in_batch, pool_, &outputs);
+  EXPECT_TRUE(status.ok()) << status.message();
+
+  auto exp = MakeArrowArrayBool({true, true, false, true, true},
+                                {true, true, false, true, true});
+  // Validate results
+  EXPECT_ARROW_ARRAY_EQUALS(exp, outputs[0]);
+}
+
 }  // namespace gandiva

Reply via email to