lidavidm commented on a change in pull request #12162:
URL: https://github.com/apache/arrow/pull/12162#discussion_r793703981



##########
File path: cpp/src/arrow/compute/kernels/scalar_nested.cc
##########
@@ -428,6 +428,273 @@ const FunctionDoc make_struct_doc{"Wrap Arrays into a 
StructArray",
                                    "specified through MakeStructOptions."),
                                   {"*args"},
                                   "MakeStructOptions"};
+template <typename KeyType>
+struct MapArrayLookupFunctor {
+  static Result<int64_t> GetOneMatchingIndex(const Array& keys,
+                                             const Scalar& query_key_scalar,
+                                             const bool& from_back) {
+    int64_t match_index = -1;
+    RETURN_NOT_OK(
+        FindMatchingIndices(keys, query_key_scalar, [&](int64_t index) -> 
Status {
+          match_index = index;
+          if (from_back) {
+            return Status::OK();
+          } else {
+            return Status::Cancelled("Found key match for FIRST");
+          }
+        }));
+
+    return match_index;
+  }
+
+  static Status BuildListOfItemsArray(const Array& keys, const Array& items,
+                                      const Scalar& query_key_scalar,
+                                      bool& found_at_least_one_key,
+                                      ArrayBuilder* builder) {
+    RETURN_NOT_OK(
+        FindMatchingIndices(keys, query_key_scalar, [&](int64_t index) -> 
Status {
+          found_at_least_one_key = true;
+          RETURN_NOT_OK(builder->AppendArraySlice(*items.data(), index, 1));
+          return Status::OK();
+        }));
+    return Status::OK();
+  }
+
+  template <typename FoundItem>
+  static Status FindMatchingIndices(const Array& keys, const Scalar& 
query_key_scalar,
+                                    FoundItem callback) {
+    const auto query_key = UnboxScalar<KeyType>::Unbox(query_key_scalar);
+    int64_t index = 0;
+    ARROW_UNUSED(VisitArrayValuesInline<KeyType>(
+        *keys.data(),
+        [&](decltype(query_key) key) -> Status {
+          Status to_return = Status::OK();
+          if (key == query_key) {
+            to_return = callback(index);
+          }
+          ++index;
+          return to_return;

Review comment:
       ```suggestion
             if (key == query_key) {
               return callback(index++);
             }
             ++index;
             return Status::OK();
   ```

##########
File path: cpp/src/arrow/compute/kernels/scalar_nested_test.cc
##########
@@ -225,6 +225,423 @@ TEST(TestScalarNested, StructField) {
   }
 }
 
+void CheckMapArrayLookupWithDifferentOptions(
+    const std::shared_ptr<Array>& map, const std::shared_ptr<Scalar>& 
query_key,
+    const std::shared_ptr<Array>& expected_all,
+    const std::shared_ptr<Array>& expected_first,
+    const std::shared_ptr<Array>& expected_last) {
+  MapArrayLookupOptions all_matches(query_key, MapArrayLookupOptions::ALL);
+  MapArrayLookupOptions first_matches(query_key, MapArrayLookupOptions::FIRST);
+  MapArrayLookupOptions last_matches(query_key, MapArrayLookupOptions::LAST);
+
+  CheckScalar("map_array_lookup", {map}, expected_all, &all_matches);
+  CheckScalar("map_array_lookup", {map}, expected_first, &first_matches);
+  CheckScalar("map_array_lookup", {map}, expected_last, &last_matches);
+}
+
+class TestMapArrayLookupKernel : public ::testing::Test {};
+
+TEST_F(TestMapArrayLookupKernel, Basic) {
+  auto type = map(utf8(), int32());
+  const char* input = R"(
+    [
+      [["foo", 99], ["bar", 1], ["hello", 2], ["foo", 3], ["lets go", 5], 
["what now?", 8]],
+      null,
+      [["nothing", null], ["hat", null], ["foo", 101], ["sorry", 1], ["dip", 
null],
+      ["foo", 22]],
+      []
+    ])";
+  auto map_array = ArrayFromJSON(type, input);
+
+  CheckMapArrayLookupWithDifferentOptions(
+      map_array, MakeScalar("foo"),
+      ArrayFromJSON(list(int32()), R"([[99, 3], null, [101, 22], null])"),
+      ArrayFromJSON(int32(), R"([99, null, 101, null])"),
+      ArrayFromJSON(int32(), R"([3, null, 22, null])"));
+}
+
+TEST_F(TestMapArrayLookupKernel, NestedItems) {
+  auto type = map(utf8(), map(int16(), int16()));
+  const char* input = R"(
+    [
+      [
+        [
+          "just",
+          [[0, 0], [1, 1]]
+        ],
+        [
+          "random",
+          [[2, 2], [3, 3]]
+        ],
+        [
+          "foo",
+          [[4, 4], [5, 5]]
+        ],
+        [
+          "values",
+          [[6, 6], [7, 7]]
+        ],
+        [
+          "foo",
+          [[8, 8], [9, 9]]
+        ],
+        [
+          "point",
+          [[10, 10], [11, 11]]
+        ],
+        [
+          "foo",
+          [[12, 12], [13, 13]]
+        ]
+      ],
+      null,
+      [
+        [
+          "yet",
+          [[0, 1], [1, 2]]
+        ],
+        [
+          "more",
+          [[2, 3], [3, 4]]
+        ],
+        [
+          "foo",
+          [[4, 5], [5, 6]]
+        ],
+        [
+          "random",
+          [[6, 7], [7, 8]]
+        ],
+        [
+          "foo",
+          [[8, 9], [9, 10]]
+        ],
+        [
+          "values",
+          [[10, 11], [11, 12]]
+        ],
+        [
+          "foo",
+          [[12, 13], [13, 14]]
+        ]
+      ],
+      []
+    ]
+  )";
+  const auto map_array = ArrayFromJSON(type, input);
+
+  const auto expected_all = ArrayFromJSON(list(map(int16(), int16())), R"(
+                                [
+                                  [
+                                    [[4, 4], [5, 5]], [[8, 8], [9, 9]],
+                                    [[12, 12], [13, 13]]
+                                  ],
+                                  null,
+                                  [
+                                    [[4, 5], [5, 6]], [[8, 9], [9, 10]],
+                                    [[12, 13], [13, 14]]
+                                  ],
+                                  null
+                                ])");
+  const auto expected_first = ArrayFromJSON(map(int16(), int16()), R"(
+                                [
+                                  [[4, 4], [5, 5]],
+                                  null,
+                                  [[4, 5], [5, 6]],
+                                  null
+                                ])");
+  const auto expected_last = ArrayFromJSON(map(int16(), int16()), R"(
+                                [
+                                  [[12, 12], [13, 13]],
+                                  null,
+                                  [[12, 13], [13, 14]],
+                                  null
+                                ])");
+
+  CheckMapArrayLookupWithDifferentOptions(map_array, MakeScalar("foo"), 
expected_all,
+                                          expected_first, expected_last);
+}
+
+TEST_F(TestMapArrayLookupKernel, BooleanKey) {
+  auto true_scalar = ScalarFromJSON(boolean(), R"(true)");
+  auto map_type = map(boolean(), int32());
+  const char* input = R"(
+    [
+      [
+        [true, 99], [false, 1], [false, 2], [true, null], [false, 5],
+        [true, 8]
+      ],
+      null,
+      [
+        [false, null], [true, 67], [false, 101], [false, 1], [false, null],
+        [false, 9], [true, 80]
+      ],
+      [],
+      [
+        [false, 1], [false, 2], [false, 3], [false, 4]
+      ],
+      [
+        [true, 9], [true, 2], [true, 5], [true, 8]
+      ]
+    ]
+  )";
+  auto map_array = ArrayFromJSON(map_type, input);
+  auto map_array_tweaked = TweakValidityBit(map_array, 5, false);
+
+  auto expected_all = ArrayFromJSON(list(int32()), R"(
+    [[99, null, 8], null, [67, 80], null, null, null ])");
+  auto expected_first = ArrayFromJSON(int32(), "[99, null, 67, null, null, 
null]");
+  auto expected_last = ArrayFromJSON(int32(), "[8, null, 80, null, null, 
null]");
+
+  CheckMapArrayLookupWithDifferentOptions(map_array_tweaked, true_scalar, 
expected_all,
+                                          expected_first, expected_last);
+}
+
+TEST_F(TestMapArrayLookupKernel, MonthDayNanoIntervalKeys) {
+  auto key_type = month_day_nano_interval();
+  auto map_type = map(key_type, utf8());
+  auto key_scalar = ScalarFromJSON(month_day_nano_interval(), R"([1, 2, -3])");
+  const char* input = R"(
+    [
+      [
+        [[-9, -10, 11], "zero"], [[1, 2, -3], "first_one"], [[11, -12, 0], 
"two"],
+        [[1, 2, -3], null], [[-7, -8, -9], "three"], [[1, 2, -3], 
"second_one"],
+        [[1, 2, -3], "last_one"]
+      ],
+      null,
+      [
+        [[-5, 6, 7], "zero_hero"], [[15, 16, 2], "almost_six"],
+        [[1, 2, -3], "the_dumb_one"], [[-7, -8, -9], "eleven"],
+        [[1, 2, -3], "the_chosen_one"], [[-5, 6, 7], "meaning of life"],
+        [[1, 2, -3], "just_one"], [[1, 2, -3], "no more ones!"]
+      ],
+      [
+        [[-5, 6, 7], "this"], [[-13, 14, -1], "has"], [[11, -12, 0], "no"],
+        [[15, 16, 2], "keys"]
+      ],
+      [
+        [[1, 2, -3], "this"], [[1, 2, -3], "should"], [[1, 2, -3], "also"],
+        [[1, 2, -3], "be"], [[1, 2, -3], "null"]
+      ],
+      []
+    ]
+  )";
+  auto map_array = ArrayFromJSON(map_type, input);
+  auto map_array_tweaked = TweakValidityBit(map_array, 4, false);
+
+  auto expected_first =
+      ArrayFromJSON(utf8(), R"(["first_one", null, "the_dumb_one", null, null, 
null])");
+  auto expected_last =
+      ArrayFromJSON(utf8(), R"(["last_one", null, "no more ones!", null, null, 
null])");
+  auto expected_all = ArrayFromJSON(list(utf8()),
+                                    R"([
+                                          ["first_one", null, "second_one", 
"last_one"],
+                                          null,
+                                          ["the_dumb_one", "the_chosen_one", 
"just_one", "no more ones!"],
+                                          null,
+                                          null,
+                                          null
+                                        ]
+                                      )");
+
+  CheckMapArrayLookupWithDifferentOptions(map_array_tweaked, key_scalar, 
expected_all,
+                                          expected_first, expected_last);
+}

Review comment:
       Eventually we should add a `TEST(TestMapArrayLookupKernel, Errors)` 
where we check error conditions like not providing a scalar or providing a 
scalar of the wrong type. (`ASSERT_RAISES(TypeError, MapArrayLookup(...));`)

##########
File path: cpp/src/arrow/compute/kernels/scalar_nested_test.cc
##########
@@ -225,6 +225,423 @@ TEST(TestScalarNested, StructField) {
   }
 }
 
+void CheckMapArrayLookupWithDifferentOptions(
+    const std::shared_ptr<Array>& map, const std::shared_ptr<Scalar>& 
query_key,
+    const std::shared_ptr<Array>& expected_all,
+    const std::shared_ptr<Array>& expected_first,
+    const std::shared_ptr<Array>& expected_last) {
+  MapArrayLookupOptions all_matches(query_key, MapArrayLookupOptions::ALL);
+  MapArrayLookupOptions first_matches(query_key, MapArrayLookupOptions::FIRST);
+  MapArrayLookupOptions last_matches(query_key, MapArrayLookupOptions::LAST);
+
+  CheckScalar("map_array_lookup", {map}, expected_all, &all_matches);
+  CheckScalar("map_array_lookup", {map}, expected_first, &first_matches);
+  CheckScalar("map_array_lookup", {map}, expected_last, &last_matches);
+}
+
+class TestMapArrayLookupKernel : public ::testing::Test {};
+
+TEST_F(TestMapArrayLookupKernel, Basic) {
+  auto type = map(utf8(), int32());
+  const char* input = R"(
+    [
+      [["foo", 99], ["bar", 1], ["hello", 2], ["foo", 3], ["lets go", 5], 
["what now?", 8]],
+      null,
+      [["nothing", null], ["hat", null], ["foo", 101], ["sorry", 1], ["dip", 
null],
+      ["foo", 22]],
+      []
+    ])";
+  auto map_array = ArrayFromJSON(type, input);
+
+  CheckMapArrayLookupWithDifferentOptions(
+      map_array, MakeScalar("foo"),
+      ArrayFromJSON(list(int32()), R"([[99, 3], null, [101, 22], null])"),
+      ArrayFromJSON(int32(), R"([99, null, 101, null])"),
+      ArrayFromJSON(int32(), R"([3, null, 22, null])"));
+}
+
+TEST_F(TestMapArrayLookupKernel, NestedItems) {
+  auto type = map(utf8(), map(int16(), int16()));
+  const char* input = R"(
+    [
+      [
+        [
+          "just",
+          [[0, 0], [1, 1]]
+        ],
+        [
+          "random",
+          [[2, 2], [3, 3]]
+        ],
+        [
+          "foo",
+          [[4, 4], [5, 5]]
+        ],
+        [
+          "values",
+          [[6, 6], [7, 7]]
+        ],
+        [
+          "foo",
+          [[8, 8], [9, 9]]
+        ],
+        [
+          "point",
+          [[10, 10], [11, 11]]
+        ],
+        [
+          "foo",
+          [[12, 12], [13, 13]]
+        ]
+      ],
+      null,
+      [
+        [
+          "yet",
+          [[0, 1], [1, 2]]
+        ],
+        [
+          "more",
+          [[2, 3], [3, 4]]
+        ],
+        [
+          "foo",
+          [[4, 5], [5, 6]]
+        ],
+        [
+          "random",
+          [[6, 7], [7, 8]]
+        ],
+        [
+          "foo",
+          [[8, 9], [9, 10]]
+        ],
+        [
+          "values",
+          [[10, 11], [11, 12]]
+        ],
+        [
+          "foo",
+          [[12, 13], [13, 14]]
+        ]
+      ],
+      []
+    ]
+  )";
+  const auto map_array = ArrayFromJSON(type, input);
+
+  const auto expected_all = ArrayFromJSON(list(map(int16(), int16())), R"(
+                                [
+                                  [
+                                    [[4, 4], [5, 5]], [[8, 8], [9, 9]],
+                                    [[12, 12], [13, 13]]
+                                  ],
+                                  null,
+                                  [
+                                    [[4, 5], [5, 6]], [[8, 9], [9, 10]],
+                                    [[12, 13], [13, 14]]
+                                  ],
+                                  null
+                                ])");
+  const auto expected_first = ArrayFromJSON(map(int16(), int16()), R"(
+                                [
+                                  [[4, 4], [5, 5]],
+                                  null,
+                                  [[4, 5], [5, 6]],
+                                  null
+                                ])");
+  const auto expected_last = ArrayFromJSON(map(int16(), int16()), R"(
+                                [
+                                  [[12, 12], [13, 13]],
+                                  null,
+                                  [[12, 13], [13, 14]],
+                                  null
+                                ])");
+
+  CheckMapArrayLookupWithDifferentOptions(map_array, MakeScalar("foo"), 
expected_all,
+                                          expected_first, expected_last);
+}
+
+TEST_F(TestMapArrayLookupKernel, BooleanKey) {
+  auto true_scalar = ScalarFromJSON(boolean(), R"(true)");
+  auto map_type = map(boolean(), int32());
+  const char* input = R"(
+    [
+      [
+        [true, 99], [false, 1], [false, 2], [true, null], [false, 5],
+        [true, 8]
+      ],
+      null,
+      [
+        [false, null], [true, 67], [false, 101], [false, 1], [false, null],
+        [false, 9], [true, 80]
+      ],
+      [],
+      [
+        [false, 1], [false, 2], [false, 3], [false, 4]
+      ],
+      [
+        [true, 9], [true, 2], [true, 5], [true, 8]

Review comment:
       In general: can we add some `null` keys in addition to `true` and 
`false`? (Same below.)

##########
File path: cpp/src/arrow/compute/api_scalar.h
##########
@@ -1350,5 +1375,20 @@ ARROW_EXPORT Result<Datum> AssumeTimezone(const Datum& 
values,
                                           AssumeTimezoneOptions options,
                                           ExecContext* ctx = NULLPTR);
 
+/// \brief Finds either the FIRST, LAST, or ALL items with a key that matches 
the given
+/// query key in a map array.
+///
+/// Returns an array of items for FIRST and LAST, and an array of list of 
items for ALL.
+///
+/// \param[in] map_array to look in
+/// \param[in] options to pass a query key and choose which matching keys to 
return
+/// (FIRST, LAST or ALL)
+/// \param[in] ctx the function execution context, optional
+///
+/// \return the resulting datum
+/// \note API not yet finalized
+ARROW_EXPORT Result<Datum> MapArrayLookup(const Datum& map_array,
+                                          MapArrayLookupOptions options,
+                                          ExecContext* ctx = NULLPTR);

Review comment:
       Yes, this is fine.

##########
File path: cpp/src/arrow/compute/kernels/scalar_nested.cc
##########
@@ -428,6 +428,273 @@ const FunctionDoc make_struct_doc{"Wrap Arrays into a 
StructArray",
                                    "specified through MakeStructOptions."),
                                   {"*args"},
                                   "MakeStructOptions"};
+template <typename KeyType>
+struct MapArrayLookupFunctor {
+  static Result<int64_t> GetOneMatchingIndex(const Array& keys,
+                                             const Scalar& query_key_scalar,
+                                             const bool& from_back) {
+    int64_t match_index = -1;
+    RETURN_NOT_OK(
+        FindMatchingIndices(keys, query_key_scalar, [&](int64_t index) -> 
Status {
+          match_index = index;
+          if (from_back) {
+            return Status::OK();
+          } else {
+            return Status::Cancelled("Found key match for FIRST");
+          }
+        }));
+
+    return match_index;
+  }
+
+  static Status BuildListOfItemsArray(const Array& keys, const Array& items,
+                                      const Scalar& query_key_scalar,
+                                      bool& found_at_least_one_key,
+                                      ArrayBuilder* builder) {
+    RETURN_NOT_OK(
+        FindMatchingIndices(keys, query_key_scalar, [&](int64_t index) -> 
Status {
+          found_at_least_one_key = true;
+          RETURN_NOT_OK(builder->AppendArraySlice(*items.data(), index, 1));
+          return Status::OK();
+        }));
+    return Status::OK();
+  }
+
+  template <typename FoundItem>
+  static Status FindMatchingIndices(const Array& keys, const Scalar& 
query_key_scalar,
+                                    FoundItem callback) {
+    const auto query_key = UnboxScalar<KeyType>::Unbox(query_key_scalar);
+    int64_t index = 0;
+    ARROW_UNUSED(VisitArrayValuesInline<KeyType>(
+        *keys.data(),
+        [&](decltype(query_key) key) -> Status {
+          Status to_return = Status::OK();
+          if (key == query_key) {
+            to_return = callback(index);
+          }
+          ++index;
+          return to_return;

Review comment:
       mostly a nit, just to slim things a little

##########
File path: cpp/src/arrow/compute/kernels/scalar_nested.cc
##########
@@ -428,6 +428,273 @@ const FunctionDoc make_struct_doc{"Wrap Arrays into a 
StructArray",
                                    "specified through MakeStructOptions."),
                                   {"*args"},
                                   "MakeStructOptions"};
+template <typename KeyType>
+struct MapArrayLookupFunctor {
+  static Result<int64_t> GetOneMatchingIndex(const Array& keys,
+                                             const Scalar& query_key_scalar,
+                                             const bool& from_back) {

Review comment:
       Sure, this works.

##########
File path: cpp/src/arrow/compute/api_scalar.cc
##########
@@ -785,5 +819,12 @@ Result<Datum> Week(const Datum& arg, WeekOptions options, 
ExecContext* ctx) {
   return CallFunction("week", {arg}, &options, ctx);
 }
 
+// ----------------------------------------------------------------------
+
+Result<Datum> MapArrayLookup(const Datum& arg, MapArrayLookupOptions options,
+                             ExecContext* ctx) {
+  return CallFunction("map_array_lookup", {arg}, &options, ctx);
+}
+

Review comment:
       Here is fine, I think
   
   Add a comment `// Structural transforms` below the `// ---` line to be 
consistent with the other sections




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to