bkmgit commented on a change in pull request #11882:
URL: https://github.com/apache/arrow/pull/11882#discussion_r767505110
##########
File path: cpp/src/arrow/compute/kernels/scalar_compare_test.cc
##########
@@ -1850,5 +1851,154 @@ TEST(TestMaxElementWiseMinElementWise, CommonTemporal) {
ResultWith(ScalarFromJSON(date64(), "86400000")));
}
+template <typename ArrowType>
+static void ValidateBetween(const Datum& val, const Datum& lhs, const Datum&
rhs,
+ const Datum& expected) {
+ ASSERT_OK_AND_ASSIGN(Datum result, Between(val, lhs, rhs));
+ AssertArraysEqual(*expected.make_array(), *result.make_array(),
+ /*verbose=*/true);
+}
+
+template <typename ArrowType>
+static void ValidateBetween(const char* value_str, const Datum& lhs, const
Datum& rhs,
+ const char* expected_str) {
+ auto value = ArrayFromJSON(TypeTraits<ArrowType>::type_singleton(),
value_str);
+ auto expected = ArrayFromJSON(TypeTraits<BooleanType>::type_singleton(),
expected_str);
+ ValidateBetween<ArrowType>(value, lhs, rhs, expected);
+}
+
+template <>
+void ValidateBetween<StringType>(const char* value_str, const Datum& lhs,
+ const Datum& rhs, const char* expected_str) {
+ auto value = ArrayFromJSON(utf8(), value_str);
+ auto expected = ArrayFromJSON(TypeTraits<BooleanType>::type_singleton(),
expected_str);
+ ValidateBetween<StringType>(value, lhs, rhs, expected);
+}
+
+template <typename ArrowType>
+class TestNumericBetweenKernel : public ::testing::Test {};
+
+TYPED_TEST_SUITE(TestNumericBetweenKernel, NumericArrowTypes);
+TYPED_TEST(TestNumericBetweenKernel, SimpleBetweenArrayScalarScalar) {
+ using ScalarType = typename TypeTraits<TypeParam>::ScalarType;
+ using CType = typename TypeTraits<TypeParam>::CType;
+
+ Datum zero(std::make_shared<ScalarType>(CType(0)));
+ Datum four(std::make_shared<ScalarType>(CType(4)));
+ ValidateBetween<TypeParam>("[]", zero, four, "[]");
+ ValidateBetween<TypeParam>("[null]", zero, four, "[null]");
+ ValidateBetween<TypeParam>("[0,0,1,1,2,2]", zero, four, "[0,0,1,1,1,1]");
+ ValidateBetween<TypeParam>("[0,1,2,3,4,5]", zero, four, "[0,1,1,1,0,0]");
+ ValidateBetween<TypeParam>("[5,4,3,2,1,0]", zero, four, "[0,0,1,1,1,0]");
+ ValidateBetween<TypeParam>("[null,0,1,1]", zero, four, "[null,0,1,1]");
+}
+
+TEST(TestSimpleBetweenKernel, SimpleStringTest) {
+ using ScalarType = typename TypeTraits<StringType>::ScalarType;
+ auto l = Datum(std::make_shared<ScalarType>("abc"));
+ auto r = Datum(std::make_shared<ScalarType>("zzz"));
+ ValidateBetween<StringType>("[]", l, r, "[]");
+ ValidateBetween<StringType>("[null]", l, r, "[null]");
+ ValidateBetween<StringType>(R"(["aaa", "aaaa", "ccc", "z"])", l, r,
+ R"([false, false, true, true])");
+ ValidateBetween<StringType>(R"(["a", "aaaa", "c", "z"])", l, r,
+ R"([false, false, true, true])");
+ ValidateBetween<StringType>(R"(["a", "aaaa", "fff", "zzzz"])", l, r,
+ R"([false, false, true, false])");
+ ValidateBetween<StringType>(R"(["abd", null, null, "zzx"])", l, r,
+ R"([true, null, null, true])");
+}
+
+TEST(TestSimpleBetweenKernel, SimpleTimestampTest) {
+ using ScalarType = typename TypeTraits<TimestampType>::ScalarType;
+ auto checkTimestampArray = [](std::shared_ptr<DataType> type, const char*
input_str,
+ const Datum& lhs, const Datum& rhs,
+ const char* expected_str) {
+ auto value = ArrayFromJSON(type, input_str);
+ auto expected = ArrayFromJSON(boolean(), expected_str);
+ ValidateBetween<TimestampType>(value, lhs, rhs, expected);
+ };
+ auto unit = TimeUnit::SECOND;
+ auto l = Datum(std::make_shared<ScalarType>(923184000, timestamp(unit)));
+ auto r = Datum(std::make_shared<ScalarType>(1602032602, timestamp(unit)));
+ checkTimestampArray(timestamp(unit), "[]", l, r, "[]");
+ checkTimestampArray(timestamp(unit), "[null]", l, r, "[null]");
+ checkTimestampArray(timestamp(unit),
R"(["1970-01-01","2000-02-29","1900-02-28"])", l,
+ r, "[false,true,false]");
+ checkTimestampArray(timestamp(unit),
R"(["1970-01-01","2000-02-29","2004-02-28"])", l,
+ r, "[false,true,true]");
+ checkTimestampArray(timestamp(unit),
R"(["2018-01-01","1999-04-04","1900-02-28"])", l,
+ r, "[true,false,false]");
+}
+
+TYPED_TEST(TestNumericBetweenKernel, SimpleBetweenArrayArrayArray) {
+ ValidateBetween<TypeParam>(
+ "[]", ArrayFromJSON(TypeTraits<TypeParam>::type_singleton(), "[]"),
+ ArrayFromJSON(TypeTraits<TypeParam>::type_singleton(), "[]"), "[]");
+ ValidateBetween<TypeParam>(
+ "[null]", ArrayFromJSON(TypeTraits<TypeParam>::type_singleton(),
"[null]"),
+ ArrayFromJSON(TypeTraits<TypeParam>::type_singleton(), "[null]"),
"[null]");
+ ValidateBetween<TypeParam>(
+ "[1,1,2,2,2]",
+ ArrayFromJSON(TypeTraits<TypeParam>::type_singleton(), "[0,0,1,3,3]"),
+ ArrayFromJSON(TypeTraits<TypeParam>::type_singleton(), "[10,10,2,5,5]"),
+ "[true,true,false,false,false]");
+ ValidateBetween<TypeParam>(
+ "[1,1,2,2,2,2]",
+ ArrayFromJSON(TypeTraits<TypeParam>::type_singleton(),
"[0,0,1,null,3,3]"),
+ ArrayFromJSON(TypeTraits<TypeParam>::type_singleton(),
"[10,10,2,2,5,5]"),
+ "[true,true,false,null,false,false]");
+}
+
+TEST(TestSimpleBetweenKernel, StringArrayArrayArrayTest) {
+ ValidateBetween<StringType>(
+ R"(["david","hello","world"])",
+ ArrayFromJSON(TypeTraits<StringType>::type_singleton(),
R"(["adam","hi","whirl"])"),
+ ArrayFromJSON(TypeTraits<StringType>::type_singleton(),
+ R"(["robert","goeiemoreen","whirlwind"])"),
+ "[true, false, false]");
+ ValidateBetween<StringType>(
+ R"(["x","a","f"])",
+ ArrayFromJSON(TypeTraits<StringType>::type_singleton(),
R"(["w","a","e"])"),
+ ArrayFromJSON(TypeTraits<StringType>::type_singleton(),
R"(["z","a","g"])"),
+ "[true, false, true]");
+ ValidateBetween<StringType>(
+ R"(["block","bit","binary"])",
+ ArrayFromJSON(TypeTraits<StringType>::type_singleton(),
+ R"(["bit","nibble","ternary"])"),
+ ArrayFromJSON(TypeTraits<StringType>::type_singleton(),
R"(["word","d","xyz"])"),
+ "[true, false, false]");
+ ValidateBetween<StringType>(R"(["Ayumi","アユミ","王梦莹"])",
+
ArrayFromJSON(TypeTraits<StringType>::type_singleton(),
+ R"(["たなか","あゆみ","歩美"])"),
+
ArrayFromJSON(TypeTraits<StringType>::type_singleton(),
+ R"(["李平之","田中","たなか"])"),
+ "[false, true, false]");
Review comment:
We can use Arabic and Korean as well. Ordering for Japanese and Mandarin
characters in UTF8 is not usually used for many purposes as it is not intuitive
without a context as explained in
http://www.localizingjapan.com/blog/2011/02/13/ - the test therefore only
checks that one gets unicode ordering which may not make sense in many cases.
Some Mandarin characters are used in Kanji, but as unicode requires one code
point per character, ordering will be problematic. Even languages which use
latin alphabet can have different ordering of characters with accents such as
Ä, Ö, and Ü. Probably an external function and/or alternative implementation is
needed for strings and will need to be something to consider for future
versions of the Arrow specification. For Japanese, can also use Kana which has
[Gojūon](https://en.wikipedia.org/wiki/Goj%C5%ABon) ordering so that the test
makes sense if this is needed. Can leave out recent Unicode extensions
described at https://en.wikipedia.org
/wiki/Kana Can raise a separate issue for the Arrow extension to be able to
sort text.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]