This is an automated email from the ASF dual-hosted git repository.
kou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new 5ca26e8922 GH-37782: [C++] Add `CanReferenceFieldsByNames` method to
`arrow::StructArray` (#37823)
5ca26e8922 is described below
commit 5ca26e89228c272305aa2070ce8eb17a54e17640
Author: sgilmore10 <[email protected]>
AuthorDate: Mon Sep 25 16:26:48 2023 -0400
GH-37782: [C++] Add `CanReferenceFieldsByNames` method to
`arrow::StructArray` (#37823)
### Rationale for this change
`arrow::Schema` has a method called `CanReferenceFieldsByNames` which
callers can use prior to calling `GetFieldByName`. It would be nice if
`arrow::StructArray` also had `CanReferenceFieldsByNames` as a method.
I also think it would be nice to add a `CanReferenceFieldByName` method
that accepts a `std::string` instead of a `std::vector<std::string>` to
`StructArray` and `Schema`. That way, users wouldn't have to create a
`std::vector` containing one `std::string` when they just have one field name.
### What changes are included in this PR?
1. Added `CanReferenceFieldsByNames` method to `StructArray`
2. Added `CanReferenceFieldByName` method to `StructArray`
3. Added `CanReferenceFieldsByName` method to `Schema`
### Are these changes tested?
Yes. I added unit tests for `CanReferenceFieldsByNames` and
`CanReferenceFieldByName` to `array_struct_test.cc` and `type_test.cc`.
### Are there any user-facing changes?
Yes. `CanReferenceFieldsByNames` and `CanReferenceFieldByName` can be
called on a `StructArray`. Users can also call `CanReferenceFieldByName` on a
`Schema`.
* Closes: #37782
Authored-by: Sarah Gilmore <[email protected]>
Signed-off-by: Sutou Kouhei <[email protected]>
---
cpp/src/arrow/array/array_nested.cc | 16 ++++++++++
cpp/src/arrow/array/array_nested.h | 6 ++++
cpp/src/arrow/array/array_struct_test.cc | 52 ++++++++++++++++++++++++++++++++
cpp/src/arrow/type.cc | 14 ++++++---
cpp/src/arrow/type.h | 3 ++
cpp/src/arrow/type_test.cc | 18 +++++++++++
6 files changed, 104 insertions(+), 5 deletions(-)
diff --git a/cpp/src/arrow/array/array_nested.cc
b/cpp/src/arrow/array/array_nested.cc
index df60074c78..d8308c8249 100644
--- a/cpp/src/arrow/array/array_nested.cc
+++ b/cpp/src/arrow/array/array_nested.cc
@@ -627,6 +627,22 @@ std::shared_ptr<Array> StructArray::GetFieldByName(const
std::string& name) cons
return i == -1 ? nullptr : field(i);
}
+Status StructArray::CanReferenceFieldByName(const std::string& name) const {
+ if (GetFieldByName(name) == nullptr) {
+ return Status::Invalid("Field named '", name,
+ "' not found or not unique in the struct.");
+ }
+ return Status::OK();
+}
+
+Status StructArray::CanReferenceFieldsByNames(
+ const std::vector<std::string>& names) const {
+ for (const auto& name : names) {
+ ARROW_RETURN_NOT_OK(CanReferenceFieldByName(name));
+ }
+ return Status::OK();
+}
+
Result<ArrayVector> StructArray::Flatten(MemoryPool* pool) const {
ArrayVector flattened;
flattened.resize(data_->child_data.size());
diff --git a/cpp/src/arrow/array/array_nested.h
b/cpp/src/arrow/array/array_nested.h
index 47c1db039c..8d5cc95fec 100644
--- a/cpp/src/arrow/array/array_nested.h
+++ b/cpp/src/arrow/array/array_nested.h
@@ -404,6 +404,12 @@ class ARROW_EXPORT StructArray : public Array {
/// Returns null if name not found
std::shared_ptr<Array> GetFieldByName(const std::string& name) const;
+ /// Indicate if field named `name` can be found unambiguously in the struct.
+ Status CanReferenceFieldByName(const std::string& name) const;
+
+ /// Indicate if fields named `names` can be found unambiguously in the
struct.
+ Status CanReferenceFieldsByNames(const std::vector<std::string>& names)
const;
+
/// \brief Flatten this array as a vector of arrays, one for each field
///
/// \param[in] pool The pool to allocate null bitmaps from, if necessary
diff --git a/cpp/src/arrow/array/array_struct_test.cc
b/cpp/src/arrow/array/array_struct_test.cc
index 318c83860e..73d53a7efa 100644
--- a/cpp/src/arrow/array/array_struct_test.cc
+++ b/cpp/src/arrow/array/array_struct_test.cc
@@ -303,6 +303,58 @@ TEST(StructArray, FlattenOfSlice) {
ASSERT_OK(arr->ValidateFull());
}
+TEST(StructArray, CanReferenceFieldByName) {
+ auto a = ArrayFromJSON(int8(), "[4, 5]");
+ auto b = ArrayFromJSON(int16(), "[6, 7]");
+ auto c = ArrayFromJSON(int32(), "[8, 9]");
+ auto d = ArrayFromJSON(int64(), "[10, 11]");
+ auto children = std::vector<std::shared_ptr<Array>>{a, b, c, d};
+
+ auto f0 = field("f0", int8());
+ auto f1 = field("f1", int16());
+ auto f2 = field("f2", int32());
+ auto f3 = field("f1", int64());
+ auto type = struct_({f0, f1, f2, f3});
+
+ auto arr = std::make_shared<StructArray>(type, 2, children);
+
+ ASSERT_OK(arr->CanReferenceFieldByName("f0"));
+ ASSERT_OK(arr->CanReferenceFieldByName("f2"));
+ // Not found
+ ASSERT_RAISES(Invalid, arr->CanReferenceFieldByName("nope"));
+
+ // Duplicates
+ ASSERT_RAISES(Invalid, arr->CanReferenceFieldByName("f1"));
+}
+
+TEST(StructArray, CanReferenceFieldsByNames) {
+ auto a = ArrayFromJSON(int8(), "[4, 5]");
+ auto b = ArrayFromJSON(int16(), "[6, 7]");
+ auto c = ArrayFromJSON(int32(), "[8, 9]");
+ auto d = ArrayFromJSON(int64(), "[10, 11]");
+ auto children = std::vector<std::shared_ptr<Array>>{a, b, c, d};
+
+ auto f0 = field("f0", int8());
+ auto f1 = field("f1", int16());
+ auto f2 = field("f2", int32());
+ auto f3 = field("f1", int64());
+ auto type = struct_({f0, f1, f2, f3});
+
+ auto arr = std::make_shared<StructArray>(type, 2, children);
+
+ ASSERT_OK(arr->CanReferenceFieldsByNames({"f0", "f2"}));
+ ASSERT_OK(arr->CanReferenceFieldsByNames({"f2", "f0"}));
+
+ // Not found
+ ASSERT_RAISES(Invalid, arr->CanReferenceFieldsByNames({"nope"}));
+ ASSERT_RAISES(Invalid, arr->CanReferenceFieldsByNames({"f0", "nope"}));
+ // Duplicates
+ ASSERT_RAISES(Invalid, arr->CanReferenceFieldsByNames({"f1"}));
+ ASSERT_RAISES(Invalid, arr->CanReferenceFieldsByNames({"f0", "f1"}));
+ // Both
+ ASSERT_RAISES(Invalid, arr->CanReferenceFieldsByNames({"f0", "f1", "nope"}));
+}
+
//
----------------------------------------------------------------------------------
// Struct test
class TestStructBuilder : public ::testing::Test {
diff --git a/cpp/src/arrow/type.cc b/cpp/src/arrow/type.cc
index 3d294a3fa8..47bf52660f 100644
--- a/cpp/src/arrow/type.cc
+++ b/cpp/src/arrow/type.cc
@@ -1847,14 +1847,18 @@ std::vector<int> Schema::GetAllFieldIndices(const
std::string& name) const {
return result;
}
+Status Schema::CanReferenceFieldByName(const std::string& name) const {
+ if (GetFieldByName(name) == nullptr) {
+ return Status::Invalid("Field named '", name,
+ "' not found or not unique in the schema.");
+ }
+ return Status::OK();
+}
+
Status Schema::CanReferenceFieldsByNames(const std::vector<std::string>&
names) const {
for (const auto& name : names) {
- if (GetFieldByName(name) == nullptr) {
- return Status::Invalid("Field named '", name,
- "' not found or not unique in the schema.");
- }
+ ARROW_RETURN_NOT_OK(CanReferenceFieldByName(name));
}
-
return Status::OK();
}
diff --git a/cpp/src/arrow/type.h b/cpp/src/arrow/type.h
index 718540d449..1991097928 100644
--- a/cpp/src/arrow/type.h
+++ b/cpp/src/arrow/type.h
@@ -2048,6 +2048,9 @@ class ARROW_EXPORT Schema : public
detail::Fingerprintable,
/// Return the indices of all fields having this name
std::vector<int> GetAllFieldIndices(const std::string& name) const;
+ /// Indicate if field named `name` can be found unambiguously in the schema.
+ Status CanReferenceFieldByName(const std::string& name) const;
+
/// Indicate if fields named `names` can be found unambiguously in the
schema.
Status CanReferenceFieldsByNames(const std::vector<std::string>& names)
const;
diff --git a/cpp/src/arrow/type_test.cc b/cpp/src/arrow/type_test.cc
index c55b33b415..3dbefdcf0c 100644
--- a/cpp/src/arrow/type_test.cc
+++ b/cpp/src/arrow/type_test.cc
@@ -548,6 +548,24 @@ TEST_F(TestSchema, GetFieldDuplicates) {
ASSERT_EQ(results.size(), 0);
}
+TEST_F(TestSchema, CanReferenceFieldByName) {
+ auto f0 = field("f0", int32());
+ auto f1 = field("f1", uint8(), false);
+ auto f2 = field("f2", utf8());
+ auto f3 = field("f1", list(int16()));
+
+ auto schema = ::arrow::schema({f0, f1, f2, f3});
+
+ ASSERT_OK(schema->CanReferenceFieldByName("f0"));
+ ASSERT_OK(schema->CanReferenceFieldByName("f2"));
+
+ // Not found
+ ASSERT_RAISES(Invalid, schema->CanReferenceFieldByName("nope"));
+
+ // Duplicates
+ ASSERT_RAISES(Invalid, schema->CanReferenceFieldByName("f1"));
+}
+
TEST_F(TestSchema, CanReferenceFieldsByNames) {
auto f0 = field("f0", int32());
auto f1 = field("f1", uint8(), false);