projjal commented on a change in pull request #9707: URL: https://github.com/apache/arrow/pull/9707#discussion_r601006184
########## File path: cpp/src/gandiva/tests/hash_test.cc ########## @@ -147,4 +149,271 @@ TEST_F(TestHash, TestBuf) { } } +TEST_F(TestHash, TestSha256Simple) { + // schema for input fields + auto field_a = field("a", int32()); + auto field_b = field("b", int64()); + auto field_c = field("c", float32()); + auto field_d = field("d", float64()); + auto schema = arrow::schema({field_a, field_b, field_c, field_d}); + + // output fields + auto res_0 = field("res0", utf8()); + auto res_1 = field("res1", utf8()); + auto res_2 = field("res2", utf8()); + auto res_3 = field("res3", utf8()); + + // build expressions. + // hashSHA256(a) + auto node_a = TreeExprBuilder::MakeField(field_a); + auto hashSha256_1 = TreeExprBuilder::MakeFunction("hashSHA256", {node_a}, utf8()); + auto expr_0 = TreeExprBuilder::MakeExpression(hashSha256_1, res_0); + + auto node_b = TreeExprBuilder::MakeField(field_b); + auto hashSha256_2 = TreeExprBuilder::MakeFunction("hashSHA256", {node_b}, utf8()); + auto expr_1 = TreeExprBuilder::MakeExpression(hashSha256_2, res_1); + + auto node_c = TreeExprBuilder::MakeField(field_c); + auto hashSha256_3 = TreeExprBuilder::MakeFunction("hashSHA256", {node_c}, utf8()); + auto expr_2 = TreeExprBuilder::MakeExpression(hashSha256_3, res_2); + + auto node_d = TreeExprBuilder::MakeField(field_d); + auto hashSha256_4 = TreeExprBuilder::MakeFunction("hashSHA256", {node_d}, utf8()); + auto expr_3 = TreeExprBuilder::MakeExpression(hashSha256_4, res_3); + + // Build a projector for the expressions. + std::shared_ptr<Projector> projector; + auto status = Projector::Make(schema, {expr_0, expr_1, expr_2, expr_3}, + TestConfiguration(), &projector); + ASSERT_OK(status) << status.message(); + + // Create a row-batch with some sample data + int num_records = 2; + auto validity_array = {false, true}; + + auto array_int32 = MakeArrowArrayInt32({1, 0}, validity_array); + + auto array_int64 = MakeArrowArrayInt64({1, 0}, validity_array); + + auto array_float32 = MakeArrowArrayFloat32({1.0, 0.0}, validity_array); + + auto array_float64 = MakeArrowArrayFloat64({1.0, 0.0}, validity_array); + + // prepare input record batch + auto in_batch = arrow::RecordBatch::Make( + schema, num_records, {array_int32, array_int64, array_float32, array_float64}); + + // Evaluate expression + arrow::ArrayVector outputs; + status = projector->Evaluate(*in_batch, pool_, &outputs); + ASSERT_OK(status); + + auto response_int32 = outputs.at(0); + auto response_int64 = outputs.at(1); + auto response_float32 = outputs.at(2); + auto response_float64 = outputs.at(3); + + // Checks if the null and zero representation for numeric values + // are consistent between the types + EXPECT_ARROW_ARRAY_EQUALS(response_int32, response_int64); + EXPECT_ARROW_ARRAY_EQUALS(response_int64, response_float32); + EXPECT_ARROW_ARRAY_EQUALS(response_float32, response_float64); + + const int sha256_hash_size = 64; + + // Checks if the hash size in response is correct + for (int i = 1; i < num_records; ++i) { + const auto& value_at_position = response_int32->GetScalar(i).ValueOrDie()->ToString(); + + EXPECT_EQ(value_at_position.size(), sha256_hash_size); + EXPECT_NE(value_at_position, + response_int32->GetScalar(i - 1).ValueOrDie()->ToString()); + } +} + +TEST_F(TestHash, TestSha256Varlen) { + // schema for input fields + auto field_a = field("a", utf8()); + auto schema = arrow::schema({field_a}); + + // output fields + auto res_0 = field("res0", utf8()); + + // build expressions. + // hashSHA256(a) + auto node_a = TreeExprBuilder::MakeField(field_a); + auto hashSha256 = TreeExprBuilder::MakeFunction("hashSHA256", {node_a}, utf8()); + auto expr_0 = TreeExprBuilder::MakeExpression(hashSha256, res_0); + + // Build a projector for the expressions. + std::shared_ptr<Projector> projector; + auto status = Projector::Make(schema, {expr_0}, TestConfiguration(), &projector); + EXPECT_TRUE(status.ok()) << status.message(); + + // Create a row-batch with some sample data + int num_records = 3; + + std::string first_string = + "ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn\nY " + "[ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ]"; + std::string second_string = + "ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeın\nY " + "[ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] コンニチハ"; + + auto array_a = + MakeArrowArrayUtf8({"foo", first_string, second_string}, {false, true, true}); + + // prepare input record batch + auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array_a}); + + // Evaluate expression + arrow::ArrayVector outputs; + status = projector->Evaluate(*in_batch, pool_, &outputs); + ASSERT_OK(status); + + auto response = outputs.at(0); + const int sha256_hash_size = 64; + + EXPECT_EQ(response->null_count(), 0); + for (int i = 1; i < num_records; ++i) { + const auto& value_at_position = response->GetScalar(i).ValueOrDie()->ToString(); + + EXPECT_EQ(value_at_position.size(), sha256_hash_size); + EXPECT_NE(value_at_position, response->GetScalar(i - 1).ValueOrDie()->ToString()); + } +} + +TEST_F(TestHash, TestSha1Simple) { + // schema for input fields + auto field_a = field("a", int32()); + auto field_b = field("b", int64()); + auto field_c = field("c", float32()); + auto field_d = field("d", float64()); + auto schema = arrow::schema({field_a, field_b, field_c, field_d}); + + // output fields + auto res_0 = field("res0", utf8()); + auto res_1 = field("res1", utf8()); + auto res_2 = field("res2", utf8()); + auto res_3 = field("res3", utf8()); + + // build expressions. + // hashSHA1(a) + auto node_a = TreeExprBuilder::MakeField(field_a); + auto hashSha1_1 = TreeExprBuilder::MakeFunction("hashSHA1", {node_a}, utf8()); + auto expr_0 = TreeExprBuilder::MakeExpression(hashSha1_1, res_0); + + auto node_b = TreeExprBuilder::MakeField(field_b); + auto hashSha1_2 = TreeExprBuilder::MakeFunction("hashSHA1", {node_b}, utf8()); + auto expr_1 = TreeExprBuilder::MakeExpression(hashSha1_2, res_1); + + auto node_c = TreeExprBuilder::MakeField(field_c); + auto hashSha1_3 = TreeExprBuilder::MakeFunction("hashSHA1", {node_c}, utf8()); + auto expr_2 = TreeExprBuilder::MakeExpression(hashSha1_3, res_2); + + auto node_d = TreeExprBuilder::MakeField(field_d); + auto hashSha1_4 = TreeExprBuilder::MakeFunction("hashSHA1", {node_d}, utf8()); + auto expr_3 = TreeExprBuilder::MakeExpression(hashSha1_4, res_3); + + // Build a projector for the expressions. + std::shared_ptr<Projector> projector; + auto status = Projector::Make(schema, {expr_0, expr_1, expr_2, expr_3}, + TestConfiguration(), &projector); + EXPECT_TRUE(status.ok()) << status.message(); + + // Create a row-batch with some sample data + int num_records = 2; + auto validity_array = {false, true}; + + auto array_int32 = MakeArrowArrayInt32({1, 0}, validity_array); + + auto array_int64 = MakeArrowArrayInt64({1, 0}, validity_array); + + auto array_float32 = MakeArrowArrayFloat32({1.0, 0.0}, validity_array); + + auto array_float64 = MakeArrowArrayFloat64({1.0, 0.0}, validity_array); + + // prepare input record batch + auto in_batch = arrow::RecordBatch::Make( + schema, num_records, {array_int32, array_int64, array_float32, array_float64}); + + // Evaluate expression + arrow::ArrayVector outputs; + status = projector->Evaluate(*in_batch, pool_, &outputs); + ASSERT_OK(status); + + auto response_int32 = outputs.at(0); + auto response_int64 = outputs.at(1); + auto response_float32 = outputs.at(2); + auto response_float64 = outputs.at(3); + + // Checks if the null and zero representation for numeric values + // are consistent between the types + EXPECT_ARROW_ARRAY_EQUALS(response_int32, response_int64); + EXPECT_ARROW_ARRAY_EQUALS(response_int64, response_float32); + EXPECT_ARROW_ARRAY_EQUALS(response_float32, response_float64); + + const int sha1_hash_size = 40; + + // Checks if the hash size in response is correct + for (int i = 1; i < num_records; ++i) { + const auto& value_at_position = response_int32->GetScalar(i).ValueOrDie()->ToString(); + + EXPECT_EQ(value_at_position.size(), sha1_hash_size); + EXPECT_NE(value_at_position, + response_int32->GetScalar(i - 1).ValueOrDie()->ToString()); + } +} + +TEST_F(TestHash, TestSha1Varlen) { + // schema for input fields + auto field_a = field("a", utf8()); + auto schema = arrow::schema({field_a}); + + // output fields + auto res_0 = field("res0", utf8()); + + // build expressions. + // hashSHA1(a) + auto node_a = TreeExprBuilder::MakeField(field_a); + auto hashSha1 = TreeExprBuilder::MakeFunction("hashSHA1", {node_a}, utf8()); + auto expr_0 = TreeExprBuilder::MakeExpression(hashSha1, res_0); + + // Build a projector for the expressions. + std::shared_ptr<Projector> projector; + auto status = Projector::Make(schema, {expr_0}, TestConfiguration(), &projector); + ASSERT_OK(status) << status.message(); + + // Create a row-batch with some sample data + int num_records = 3; + + std::string first_string = + "ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn\nY [ˈʏpsilɔn], " + "Yen [jɛn], Yoga [ˈjoːgɑ]"; + std::string second_string = + "ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeın\nY [ˈʏpsilɔn], " + "Yen [jɛn], Yoga [ˈjoːgɑ] コンニチハ"; + + auto array_a = + MakeArrowArrayUtf8({"", first_string, second_string}, {false, true, true}); + + // prepare input record batch + auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array_a}); + + // Evaluate expression + arrow::ArrayVector outputs; + status = projector->Evaluate(*in_batch, pool_, &outputs); + ASSERT_OK(status); + + auto response = outputs.at(0); + const int sha1_hash_size = 40; + + EXPECT_EQ(response->null_count(), 0); + for (int i = 1; i < num_records; ++i) { Review comment: can you also add assertion for the first record which is null. test case for hash of null value is missing. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org