anthonylouisbsb commented on a change in pull request #9707:
URL: https://github.com/apache/arrow/pull/9707#discussion_r597711349
##########
File path: cpp/src/gandiva/tests/hash_test.cc
##########
@@ -147,4 +153,252 @@ TEST_F(TestHash, TestBuf) {
}
}
+TEST_F(TestHash, TestSha256Simple) {
+ // schema for input fields
+ auto field_a = field("a", int32());
+ auto field_b = field("b", int64());
+ auto field_c = field("c", float32());
+ auto field_d = field("d", float64());
+ auto schema = arrow::schema({field_a, field_b, field_c, field_d});
+
+ // output fields
+ auto res_0 = field("res0", utf8());
+ auto res_1 = field("res1", utf8());
+ auto res_2 = field("res2", utf8());
+ auto res_3 = field("res3", utf8());
+
+ // build expressions.
+ // hashSHA256(a)
+ auto node_a = TreeExprBuilder::MakeField(field_a);
+ auto hashSha256_1 = TreeExprBuilder::MakeFunction("hashSHA256",
+
{node_a}, utf8());
+ auto expr_0 = TreeExprBuilder::MakeExpression(hashSha256_1, res_0);
+
+ auto node_b = TreeExprBuilder::MakeField(field_b);
+ auto hashSha256_2 = TreeExprBuilder::MakeFunction("hashSHA256",
+
{node_b}, utf8());
+ auto expr_1 = TreeExprBuilder::MakeExpression(hashSha256_2, res_1);
+
+ auto node_c = TreeExprBuilder::MakeField(field_c);
+ auto hashSha256_3 = TreeExprBuilder::MakeFunction("hashSHA256",
+
{node_c}, utf8());
+ auto expr_2 = TreeExprBuilder::MakeExpression(hashSha256_3, res_2);
+
+ auto node_d = TreeExprBuilder::MakeField(field_d);
+ auto hashSha256_4 = TreeExprBuilder::MakeFunction("hashSHA256",
+
{node_d}, utf8());
+ auto expr_3 = TreeExprBuilder::MakeExpression(hashSha256_4, res_3);
+
+ // Build a projector for the expressions.
+ std::shared_ptr<Projector> projector;
+ auto status =
+ Projector::Make(schema, {expr_0, expr_1, expr_2, expr_3},
+ TestConfiguration(), &projector);
+ EXPECT_TRUE(status.ok()) << status.message();
+
+ // Create a row-batch with some sample data
+ int num_records = 2;
+ auto validity_array = {false, true};
+
+ auto array_int32 =
+ MakeArrowArrayInt32({1, 0}, validity_array);
+
+ auto array_int64 =
+ MakeArrowArrayInt64({1, 0}, validity_array);
+
+ auto array_float32 =
+ MakeArrowArrayFloat32({1.0, 0.0}, validity_array);
+
+ auto array_float64 =
+ MakeArrowArrayFloat64({1.0, 0.0}, validity_array);
+
+ // prepare input record batch
+ auto in_batch = arrow::RecordBatch::Make(schema, num_records,
+
{array_int32, array_int64,
+ array_float32, array_float64});
+
+ // Evaluate expression
+ arrow::ArrayVector outputs;
+ status = projector->Evaluate(*in_batch, pool_, &outputs);
+ EXPECT_TRUE(status.ok());
+
+ EXPECT_ARROW_ARRAY_EQUALS(outputs.at(0), outputs.at(1));
+ EXPECT_ARROW_ARRAY_EQUALS(outputs.at(1), outputs.at(2));
+ EXPECT_ARROW_ARRAY_EQUALS(outputs.at(2), outputs.at(3));
+}
+
+TEST_F(TestHash, TestSha256Varlen) {
+ // schema for input fields
+ auto field_a = field("a", utf8());
+ auto schema = arrow::schema({field_a});
+
+ // output fields
+ auto res_0 = field("res0", utf8());
+
+ // build expressions.
+ // hashSHA256(a)
+ auto node_a = TreeExprBuilder::MakeField(field_a);
+ auto hashSha256 = TreeExprBuilder::MakeFunction("hashSHA256",
+
{node_a}, utf8());
+ auto expr_0 = TreeExprBuilder::MakeExpression(hashSha256, res_0);
+
+ // Build a projector for the expressions.
+ std::shared_ptr<Projector> projector;
+ auto status =
+ Projector::Make(schema, {expr_0}, TestConfiguration(), &projector);
+ EXPECT_TRUE(status.ok()) << status.message();
+
+ // Create a row-batch with some sample data
+ int num_records = 3;
+
+ std::string first_string = "ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn\nY "
+ "[ˈʏpsilɔn], Yen
[jɛn], Yoga [ˈjoːgɑ]";
+ std::string second_string = "ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeın\nY "
+ "[ˈʏpsilɔn], Yen
[jɛn], Yoga [ˈjoːgɑ] コンニチハ";
+
+ auto array_a =
+ MakeArrowArrayUtf8({"foo", first_string, second_string}, {false, true,
true});
+
+ // prepare input record batch
+ auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array_a});
+
+ // Evaluate expression
+ arrow::ArrayVector outputs;
+ status = projector->Evaluate(*in_batch, pool_, &outputs);
+ EXPECT_TRUE(status.ok());
+
+ auto response = outputs.at(0);
+ EXPECT_EQ(response->null_count(), 0);
+ EXPECT_EQ(response->GetScalar(0).ValueOrDie()->ToString(), "");
+ for (int i = 1; i < num_records; ++i) {
+ const auto &value_at_position =
response->GetScalar(i).ValueOrDie()->ToString();
+ EXPECT_NE(value_at_position,
+ response->GetScalar(i - 1).ValueOrDie()->ToString());
+ }
+}
+
+TEST_F(TestHash, TestSha128Simple) {
+ // schema for input fields
+ auto field_a = field("a", int32());
+ auto field_b = field("b", int64());
+ auto field_c = field("c", float32());
+ auto field_d = field("d", float64());
+ auto schema = arrow::schema({field_a, field_b, field_c, field_d});
+
+ // output fields
+ auto res_0 = field("res0", utf8());
+ auto res_1 = field("res1", utf8());
+ auto res_2 = field("res2", utf8());
+ auto res_3 = field("res3", utf8());
+
+ // build expressions.
+ // hashSHA128(a)
+ auto node_a = TreeExprBuilder::MakeField(field_a);
+ auto hashSha128_1 = TreeExprBuilder::MakeFunction("hashSHA128",
+
{node_a}, utf8());
+ auto expr_0 = TreeExprBuilder::MakeExpression(hashSha128_1, res_0);
+
+ auto node_b = TreeExprBuilder::MakeField(field_b);
+ auto hashSha128_2 = TreeExprBuilder::MakeFunction("hashSHA128",
+
{node_b}, utf8());
+ auto expr_1 = TreeExprBuilder::MakeExpression(hashSha128_2, res_1);
+
+ auto node_c = TreeExprBuilder::MakeField(field_c);
+ auto hashSha128_3 = TreeExprBuilder::MakeFunction("hashSHA128",
+
{node_c}, utf8());
+ auto expr_2 = TreeExprBuilder::MakeExpression(hashSha128_3, res_2);
+
+ auto node_d = TreeExprBuilder::MakeField(field_d);
+ auto hashSha128_4 = TreeExprBuilder::MakeFunction("hashSHA128",
+
{node_d}, utf8());
+ auto expr_3 = TreeExprBuilder::MakeExpression(hashSha128_4, res_3);
+
+ // Build a projector for the expressions.
+ std::shared_ptr<Projector> projector;
+ auto status =
+ Projector::Make(schema, {expr_0, expr_1, expr_2, expr_3},
+ TestConfiguration(),
&projector);
+ EXPECT_TRUE(status.ok()) << status.message();
+
+ // Create a row-batch with some sample data
+ int num_records = 2;
+ auto validity_array = {false, true};
+
+ auto array_int32 =
+ MakeArrowArrayInt32({1, 0}, validity_array);
+
+ auto array_int64 =
+ MakeArrowArrayInt64({1, 0}, validity_array);
+
+ auto array_float32 =
+ MakeArrowArrayFloat32({1.0, 0.0}, validity_array);
+
+ auto array_float64 =
+ MakeArrowArrayFloat64({1.0, 0.0}, validity_array);
+
+ // prepare input record batch
+ auto in_batch = arrow::RecordBatch::Make(schema, num_records,
+
{array_int32, array_int64,
+ array_float32, array_float64});
+
+ // Evaluate expression
+ arrow::ArrayVector outputs;
+ status = projector->Evaluate(*in_batch, pool_, &outputs);
+ EXPECT_TRUE(status.ok());
+
+ EXPECT_ARROW_ARRAY_EQUALS(outputs.at(0), outputs.at(1));
+ EXPECT_ARROW_ARRAY_EQUALS(outputs.at(1), outputs.at(2));
+ EXPECT_ARROW_ARRAY_EQUALS(outputs.at(2), outputs.at(3));
+}
+
+TEST_F(TestHash, TestSha128Varlen) {
+ // schema for input fields
+ auto field_a = field("a", utf8());
+ auto schema = arrow::schema({field_a});
+
+ // output fields
+ auto res_0 = field("res0", utf8());
+
+ // build expressions.
+ // hashSHA128(a)
+ auto node_a = TreeExprBuilder::MakeField(field_a);
+ auto hashSha128 = TreeExprBuilder::MakeFunction("hashSHA128",
+
{node_a}, utf8());
+ auto expr_0 = TreeExprBuilder::MakeExpression(hashSha128, res_0);
+
+ // Build a projector for the expressions.
+ std::shared_ptr<Projector> projector;
+ auto status =
+ Projector::Make(schema, {expr_0}, TestConfiguration(), &projector);
+ EXPECT_TRUE(status.ok()) << status.message();
+
+ // Create a row-batch with some sample data
+ int num_records = 3;
+
+ std::string first_string = "ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn\nY
[ˈʏpsilɔn], "
+ "Yen [jɛn], Yoga
[ˈjoːgɑ]";
+ std::string second_string = "ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeın\nY
[ˈʏpsilɔn], "
+ "Yen [jɛn],
Yoga [ˈjoːgɑ] コンニチハ";
+
+ auto array_a =
+ MakeArrowArrayUtf8({"foo", first_string, second_string},
+ {false, true, true});
+
+ // prepare input record batch
+ auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array_a});
+
+ // Evaluate expression
+ arrow::ArrayVector outputs;
+ status = projector->Evaluate(*in_batch, pool_, &outputs);
+ EXPECT_TRUE(status.ok());
+
+ auto response = outputs.at(0);
+ EXPECT_EQ(response->null_count(), 0);
+ EXPECT_EQ(response->GetScalar(0).ValueOrDie()->ToString(), "");
+ for (int i = 1; i < num_records; ++i) {
+ const auto &value_at_position =
response->GetScalar(i).ValueOrDie()->ToString();
+ EXPECT_NE(value_at_position,
+ response->GetScalar(i -
1).ValueOrDie()->ToString());
+ }
Review comment:
Added the asserts for the hash size in the integration tests.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]