This is an automated email from the ASF dual-hosted git repository.
kou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new be38880275 GH-44759: [GLib] Add garrow_record_batch_validate() (#45353)
be38880275 is described below
commit be388802759835b793b454a177505e72ff6638b2
Author: Hiroyuki Sato <[email protected]>
AuthorDate: Mon Jan 27 12:02:56 2025 +0900
GH-44759: [GLib] Add garrow_record_batch_validate() (#45353)
### Rationale for this change
[RecordBatch::Validate](https://arrow.apache.org/docs/cpp/api/table.html#_CPPv4NK5arrow11RecordBatch8ValidateEv)
available in the C++ API.
But, GLib doesn't support that method yet.
### What changes are included in this PR?
This PR adds a validation method in the record-batch class.
Before this change, the `Validate()` method was used in the
`garrow_record_batch_new` implicitly.
This PR removes it and adds it as a separate method. Users need to call
`garrow_record_batch_validate()` explicitly by themselves. This is a backward
incompatible change.
### Are these changes tested?
Yes.
### Are there any user-facing changes?
Yes.
**This PR includes breaking changes to public APIs.**
* GitHub Issue: #44759
Authored-by: Hiroyuki Sato <[email protected]>
Signed-off-by: Sutou Kouhei <[email protected]>
---
c_glib/arrow-glib/record-batch.cpp | 23 +++++++++++++++++------
c_glib/arrow-glib/record-batch.h | 4 ++++
c_glib/test/test-record-batch.rb | 32 ++++++++++++++++++++++++++++++++
3 files changed, 53 insertions(+), 6 deletions(-)
diff --git a/c_glib/arrow-glib/record-batch.cpp
b/c_glib/arrow-glib/record-batch.cpp
index be9b361ae0..967b44538e 100644
--- a/c_glib/arrow-glib/record-batch.cpp
+++ b/c_glib/arrow-glib/record-batch.cpp
@@ -191,12 +191,7 @@ garrow_record_batch_new(GArrowSchema *schema,
}
auto arrow_record_batch = arrow::RecordBatch::Make(arrow_schema, n_rows,
arrow_columns);
- auto status = arrow_record_batch->Validate();
- if (garrow_error_check(error, status, tag)) {
- return garrow_record_batch_new_raw(&arrow_record_batch);
- } else {
- return NULL;
- }
+ return garrow_record_batch_new_raw(&arrow_record_batch);
}
/**
@@ -702,3 +697,19 @@
garrow_record_batch_iterator_get_raw(GArrowRecordBatchIterator *iterator)
auto priv = GARROW_RECORD_BATCH_ITERATOR_GET_PRIVATE(iterator);
return &priv->iterator;
}
+
+/**
+ * garrow_record_batch_validate
+ * @record_batch: A #GArrowRecordBatch
+ * @error: (nullable): Return location for a #GError or %NULL.
+ *
+ * Returns: %TRUE on success, %FALSE on error.
+ *
+ * Since: 20.0.0
+ */
+gboolean
+garrow_record_batch_validate(GArrowRecordBatch *record_batch, GError **error)
+{
+ const auto arrow_record_batch = garrow_record_batch_get_raw(record_batch);
+ return garrow::check(error, arrow_record_batch->Validate(),
"[record-batch][validate]");
+}
diff --git a/c_glib/arrow-glib/record-batch.h b/c_glib/arrow-glib/record-batch.h
index e7ffd83795..8d17a44be5 100644
--- a/c_glib/arrow-glib/record-batch.h
+++ b/c_glib/arrow-glib/record-batch.h
@@ -109,6 +109,10 @@ garrow_record_batch_serialize(GArrowRecordBatch
*record_batch,
GArrowWriteOptions *options,
GError **error);
+GARROW_AVAILABLE_IN_20_0
+gboolean
+garrow_record_batch_validate(GArrowRecordBatch *record_batch, GError **error);
+
#define GARROW_TYPE_RECORD_BATCH_ITERATOR
(garrow_record_batch_iterator_get_type())
GARROW_AVAILABLE_IN_0_17
G_DECLARE_DERIVABLE_TYPE(GArrowRecordBatchIterator,
diff --git a/c_glib/test/test-record-batch.rb b/c_glib/test/test-record-batch.rb
index bbdbf82d07..ba4b15a677 100644
--- a/c_glib/test/test-record-batch.rb
+++ b/c_glib/test/test-record-batch.rb
@@ -189,5 +189,37 @@ valid: [
assert_equal(@record_batch,
input_stream.read_record_batch(@record_batch.schema))
end
+
+ sub_test_case("#validate") do
+ def setup
+ @id_field = Arrow::Field.new("id", Arrow::UInt8DataType.new)
+ @name_field = Arrow::Field.new("name", Arrow::StringDataType.new)
+ @schema = Arrow::Schema.new([@id_field, @name_field])
+
+ @id_value = build_uint_array([1])
+ @name_value = build_string_array(["abc"])
+ @values = [@id_value, @name_value]
+ end
+
+ def test_valid
+ n_rows = @id_value.length
+ record_batch = Arrow::RecordBatch.new(@schema, n_rows, @values)
+
+ assert do
+ record_batch.validate
+ end
+ end
+
+ def test_invalid
+ message = "[record-batch][validate]: Invalid: " +
+ "Number of rows in column 0 did not match batch: 1 vs 2"
+ n_rows = @id_value.length + 1 # incorrect number of rows
+
+ record_batch = Arrow::RecordBatch.new(@schema, n_rows, @values)
+ assert_raise(Arrow::Error::Invalid.new(message)) do
+ record_batch.validate
+ end
+ end
+ end
end
end