This is an automated email from the ASF dual-hosted git repository.

kou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
     new 5ca26e8922 GH-37782: [C++] Add `CanReferenceFieldsByNames` method to 
`arrow::StructArray` (#37823)
5ca26e8922 is described below

commit 5ca26e89228c272305aa2070ce8eb17a54e17640
Author: sgilmore10 <[email protected]>
AuthorDate: Mon Sep 25 16:26:48 2023 -0400

    GH-37782: [C++] Add `CanReferenceFieldsByNames` method to 
`arrow::StructArray` (#37823)
    
    
    
    ### Rationale for this change
    
    `arrow::Schema` has a method called `CanReferenceFieldsByNames` which 
callers can use prior to calling `GetFieldByName`. It would be nice if 
`arrow::StructArray` also had `CanReferenceFieldsByNames` as a method.
    
    I also think it would be nice to add a `CanReferenceFieldByName` method 
that accepts a `std::string` instead of a `std::vector<std::string>` to 
`StructArray` and `Schema`. That way, users wouldn't have to create a 
`std::vector` containing one `std::string` when they just have one field name.
    
    ### What changes are included in this PR?
    
    1. Added `CanReferenceFieldsByNames` method to `StructArray`
    2. Added `CanReferenceFieldByName` method to `StructArray`
    3. Added `CanReferenceFieldsByName` method to `Schema`
    
    ### Are these changes tested?
    
    Yes. I added unit tests for `CanReferenceFieldsByNames` and 
`CanReferenceFieldByName` to `array_struct_test.cc` and `type_test.cc`.
    
    ### Are there any user-facing changes?
    
    Yes. `CanReferenceFieldsByNames` and `CanReferenceFieldByName` can be 
called on a `StructArray`. Users can also call `CanReferenceFieldByName` on a 
`Schema`.
    
    * Closes: #37782
    
    Authored-by: Sarah Gilmore <[email protected]>
    Signed-off-by: Sutou Kouhei <[email protected]>
---
 cpp/src/arrow/array/array_nested.cc      | 16 ++++++++++
 cpp/src/arrow/array/array_nested.h       |  6 ++++
 cpp/src/arrow/array/array_struct_test.cc | 52 ++++++++++++++++++++++++++++++++
 cpp/src/arrow/type.cc                    | 14 ++++++---
 cpp/src/arrow/type.h                     |  3 ++
 cpp/src/arrow/type_test.cc               | 18 +++++++++++
 6 files changed, 104 insertions(+), 5 deletions(-)

diff --git a/cpp/src/arrow/array/array_nested.cc 
b/cpp/src/arrow/array/array_nested.cc
index df60074c78..d8308c8249 100644
--- a/cpp/src/arrow/array/array_nested.cc
+++ b/cpp/src/arrow/array/array_nested.cc
@@ -627,6 +627,22 @@ std::shared_ptr<Array> StructArray::GetFieldByName(const 
std::string& name) cons
   return i == -1 ? nullptr : field(i);
 }
 
+Status StructArray::CanReferenceFieldByName(const std::string& name) const {
+  if (GetFieldByName(name) == nullptr) {
+    return Status::Invalid("Field named '", name,
+                           "' not found or not unique in the struct.");
+  }
+  return Status::OK();
+}
+
+Status StructArray::CanReferenceFieldsByNames(
+    const std::vector<std::string>& names) const {
+  for (const auto& name : names) {
+    ARROW_RETURN_NOT_OK(CanReferenceFieldByName(name));
+  }
+  return Status::OK();
+}
+
 Result<ArrayVector> StructArray::Flatten(MemoryPool* pool) const {
   ArrayVector flattened;
   flattened.resize(data_->child_data.size());
diff --git a/cpp/src/arrow/array/array_nested.h 
b/cpp/src/arrow/array/array_nested.h
index 47c1db039c..8d5cc95fec 100644
--- a/cpp/src/arrow/array/array_nested.h
+++ b/cpp/src/arrow/array/array_nested.h
@@ -404,6 +404,12 @@ class ARROW_EXPORT StructArray : public Array {
   /// Returns null if name not found
   std::shared_ptr<Array> GetFieldByName(const std::string& name) const;
 
+  /// Indicate if field named `name` can be found unambiguously in the struct.
+  Status CanReferenceFieldByName(const std::string& name) const;
+
+  /// Indicate if fields named `names` can be found unambiguously in the 
struct.
+  Status CanReferenceFieldsByNames(const std::vector<std::string>& names) 
const;
+
   /// \brief Flatten this array as a vector of arrays, one for each field
   ///
   /// \param[in] pool The pool to allocate null bitmaps from, if necessary
diff --git a/cpp/src/arrow/array/array_struct_test.cc 
b/cpp/src/arrow/array/array_struct_test.cc
index 318c83860e..73d53a7efa 100644
--- a/cpp/src/arrow/array/array_struct_test.cc
+++ b/cpp/src/arrow/array/array_struct_test.cc
@@ -303,6 +303,58 @@ TEST(StructArray, FlattenOfSlice) {
   ASSERT_OK(arr->ValidateFull());
 }
 
+TEST(StructArray, CanReferenceFieldByName) {
+  auto a = ArrayFromJSON(int8(), "[4, 5]");
+  auto b = ArrayFromJSON(int16(), "[6, 7]");
+  auto c = ArrayFromJSON(int32(), "[8, 9]");
+  auto d = ArrayFromJSON(int64(), "[10, 11]");
+  auto children = std::vector<std::shared_ptr<Array>>{a, b, c, d};
+
+  auto f0 = field("f0", int8());
+  auto f1 = field("f1", int16());
+  auto f2 = field("f2", int32());
+  auto f3 = field("f1", int64());
+  auto type = struct_({f0, f1, f2, f3});
+
+  auto arr = std::make_shared<StructArray>(type, 2, children);
+
+  ASSERT_OK(arr->CanReferenceFieldByName("f0"));
+  ASSERT_OK(arr->CanReferenceFieldByName("f2"));
+  // Not found
+  ASSERT_RAISES(Invalid, arr->CanReferenceFieldByName("nope"));
+
+  // Duplicates
+  ASSERT_RAISES(Invalid, arr->CanReferenceFieldByName("f1"));
+}
+
+TEST(StructArray, CanReferenceFieldsByNames) {
+  auto a = ArrayFromJSON(int8(), "[4, 5]");
+  auto b = ArrayFromJSON(int16(), "[6, 7]");
+  auto c = ArrayFromJSON(int32(), "[8, 9]");
+  auto d = ArrayFromJSON(int64(), "[10, 11]");
+  auto children = std::vector<std::shared_ptr<Array>>{a, b, c, d};
+
+  auto f0 = field("f0", int8());
+  auto f1 = field("f1", int16());
+  auto f2 = field("f2", int32());
+  auto f3 = field("f1", int64());
+  auto type = struct_({f0, f1, f2, f3});
+
+  auto arr = std::make_shared<StructArray>(type, 2, children);
+
+  ASSERT_OK(arr->CanReferenceFieldsByNames({"f0", "f2"}));
+  ASSERT_OK(arr->CanReferenceFieldsByNames({"f2", "f0"}));
+
+  // Not found
+  ASSERT_RAISES(Invalid, arr->CanReferenceFieldsByNames({"nope"}));
+  ASSERT_RAISES(Invalid, arr->CanReferenceFieldsByNames({"f0", "nope"}));
+  // Duplicates
+  ASSERT_RAISES(Invalid, arr->CanReferenceFieldsByNames({"f1"}));
+  ASSERT_RAISES(Invalid, arr->CanReferenceFieldsByNames({"f0", "f1"}));
+  // Both
+  ASSERT_RAISES(Invalid, arr->CanReferenceFieldsByNames({"f0", "f1", "nope"}));
+}
+
 // 
----------------------------------------------------------------------------------
 // Struct test
 class TestStructBuilder : public ::testing::Test {
diff --git a/cpp/src/arrow/type.cc b/cpp/src/arrow/type.cc
index 3d294a3fa8..47bf52660f 100644
--- a/cpp/src/arrow/type.cc
+++ b/cpp/src/arrow/type.cc
@@ -1847,14 +1847,18 @@ std::vector<int> Schema::GetAllFieldIndices(const 
std::string& name) const {
   return result;
 }
 
+Status Schema::CanReferenceFieldByName(const std::string& name) const {
+  if (GetFieldByName(name) == nullptr) {
+    return Status::Invalid("Field named '", name,
+                           "' not found or not unique in the schema.");
+  }
+  return Status::OK();
+}
+
 Status Schema::CanReferenceFieldsByNames(const std::vector<std::string>& 
names) const {
   for (const auto& name : names) {
-    if (GetFieldByName(name) == nullptr) {
-      return Status::Invalid("Field named '", name,
-                             "' not found or not unique in the schema.");
-    }
+    ARROW_RETURN_NOT_OK(CanReferenceFieldByName(name));
   }
-
   return Status::OK();
 }
 
diff --git a/cpp/src/arrow/type.h b/cpp/src/arrow/type.h
index 718540d449..1991097928 100644
--- a/cpp/src/arrow/type.h
+++ b/cpp/src/arrow/type.h
@@ -2048,6 +2048,9 @@ class ARROW_EXPORT Schema : public 
detail::Fingerprintable,
   /// Return the indices of all fields having this name
   std::vector<int> GetAllFieldIndices(const std::string& name) const;
 
+  /// Indicate if field named `name` can be found unambiguously in the schema.
+  Status CanReferenceFieldByName(const std::string& name) const;
+
   /// Indicate if fields named `names` can be found unambiguously in the 
schema.
   Status CanReferenceFieldsByNames(const std::vector<std::string>& names) 
const;
 
diff --git a/cpp/src/arrow/type_test.cc b/cpp/src/arrow/type_test.cc
index c55b33b415..3dbefdcf0c 100644
--- a/cpp/src/arrow/type_test.cc
+++ b/cpp/src/arrow/type_test.cc
@@ -548,6 +548,24 @@ TEST_F(TestSchema, GetFieldDuplicates) {
   ASSERT_EQ(results.size(), 0);
 }
 
+TEST_F(TestSchema, CanReferenceFieldByName) {
+  auto f0 = field("f0", int32());
+  auto f1 = field("f1", uint8(), false);
+  auto f2 = field("f2", utf8());
+  auto f3 = field("f1", list(int16()));
+
+  auto schema = ::arrow::schema({f0, f1, f2, f3});
+
+  ASSERT_OK(schema->CanReferenceFieldByName("f0"));
+  ASSERT_OK(schema->CanReferenceFieldByName("f2"));
+
+  // Not found
+  ASSERT_RAISES(Invalid, schema->CanReferenceFieldByName("nope"));
+
+  // Duplicates
+  ASSERT_RAISES(Invalid, schema->CanReferenceFieldByName("f1"));
+}
+
 TEST_F(TestSchema, CanReferenceFieldsByNames) {
   auto f0 = field("f0", int32());
   auto f1 = field("f1", uint8(), false);

Reply via email to