This is an automated email from the ASF dual-hosted git repository.
kou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new c1036681b0 GH-49956: [GLib] Add fallback data type for unknown
extension data type (#49969)
c1036681b0 is described below
commit c1036681b099c5f9b0684a710be04bb7619e926f
Author: Sutou Kouhei <[email protected]>
AuthorDate: Thu May 14 10:55:46 2026 +0900
GH-49956: [GLib] Add fallback data type for unknown extension data type
(#49969)
### Rationale for this change
Users can define any extension data type. So we may encounter an unknown
extension type. We can't use `GArrowExtensionDataType` for an unknown extension
data type because `GArrowExtensionDataType` is an abstract class.
### What changes are included in this PR?
This adds `GArrowUnknownExtensionDataType` and uses it for all unknown
extension data types.
### Are these changes tested?
Yes.
### Are there any user-facing changes?
Yes.
* GitHub Issue: #49956
Authored-by: Sutou Kouhei <[email protected]>
Signed-off-by: Sutou Kouhei <[email protected]>
---
c_glib/arrow-glib/basic-data-type.cpp | 40 +++++++++++++++++++-----
c_glib/arrow-glib/basic-data-type.h | 13 ++++++++
c_glib/test/test-fixed-shape-tensor-data-type.rb | 5 +++
c_glib/test/test-uuid-data-type.rb | 5 +++
4 files changed, 56 insertions(+), 7 deletions(-)
diff --git a/c_glib/arrow-glib/basic-data-type.cpp
b/c_glib/arrow-glib/basic-data-type.cpp
index 87c5eed530..e7efcdcc12 100644
--- a/c_glib/arrow-glib/basic-data-type.cpp
+++ b/c_glib/arrow-glib/basic-data-type.cpp
@@ -131,6 +131,9 @@ G_BEGIN_DECLS
* #GArrowExtensionDataType is a base class for user-defined extension
* data types.
*
+ * #GArrowUnknownExtensionDataType is a class for unknown extension
+ * data types.
+ *
* #GArrowExtensionDataTypeRegistry is a class to manage extension
* data types.
*
@@ -2080,6 +2083,20 @@ namespace garrow {
G_BEGIN_DECLS
+G_DEFINE_TYPE(GArrowUnknownExtensionDataType,
+ garrow_unknown_extension_data_type,
+ GARROW_TYPE_EXTENSION_DATA_TYPE)
+
+static void
+garrow_unknown_extension_data_type_init(GArrowUnknownExtensionDataType *object)
+{
+}
+
+static void
+garrow_unknown_extension_data_type_class_init(GArrowUnknownExtensionDataTypeClass
*klass)
+{
+}
+
typedef struct GArrowExtensionDataTypeRegistryPrivate_
{
std::shared_ptr<arrow::ExtensionTypeRegistry> registry;
@@ -2720,16 +2737,25 @@
garrow_data_type_new_raw(std::shared_ptr<arrow::DataType> *arrow_data_type)
type = GARROW_TYPE_DURATION_DATA_TYPE;
break;
case arrow::Type::type::EXTENSION:
+ type = GARROW_TYPE_UNKNOWN_EXTENSION_DATA_TYPE;
{
- auto g_extension_data_type =
- std::static_pointer_cast<garrow::GExtensionType>(*arrow_data_type);
- if (g_extension_data_type) {
- auto garrow_data_type = g_extension_data_type->garrow_data_type();
- g_object_ref(garrow_data_type);
- return GARROW_DATA_TYPE(garrow_data_type);
+ auto arrow_extension_data_type =
+ std::static_pointer_cast<arrow::ExtensionType>(*arrow_data_type);
+ auto name = arrow_extension_data_type->extension_name();
+ if (name == "arrow.fixed_shape_tensor") {
+ type = GARROW_TYPE_FIXED_SHAPE_TENSOR_DATA_TYPE;
+ } else if (name == "arrow.uuid") {
+ type = GARROW_TYPE_UUID_DATA_TYPE;
+ } else {
+ auto g_extension_data_type =
+ std::dynamic_pointer_cast<garrow::GExtensionType>(*arrow_data_type);
+ if (g_extension_data_type) {
+ auto garrow_data_type = g_extension_data_type->garrow_data_type();
+ g_object_ref(garrow_data_type);
+ return GARROW_DATA_TYPE(garrow_data_type);
+ }
}
}
- type = GARROW_TYPE_EXTENSION_DATA_TYPE;
break;
case arrow::Type::type::FIXED_SIZE_LIST:
type = GARROW_TYPE_FIXED_SIZE_LIST_DATA_TYPE;
diff --git a/c_glib/arrow-glib/basic-data-type.h
b/c_glib/arrow-glib/basic-data-type.h
index 960051d457..21f4be7c0c 100644
--- a/c_glib/arrow-glib/basic-data-type.h
+++ b/c_glib/arrow-glib/basic-data-type.h
@@ -758,6 +758,19 @@ GArrowChunkedArray *
garrow_extension_data_type_wrap_chunked_array(GArrowExtensionDataType
*data_type,
GArrowChunkedArray *storage);
+#define GARROW_TYPE_UNKNOWN_EXTENSION_DATA_TYPE
\
+ (garrow_unknown_extension_data_type_get_type())
+GARROW_AVAILABLE_IN_25_0
+G_DECLARE_DERIVABLE_TYPE(GArrowUnknownExtensionDataType,
+ garrow_unknown_extension_data_type,
+ GARROW,
+ UNKNOWN_EXTENSION_DATA_TYPE,
+ GArrowExtensionDataType)
+struct _GArrowUnknownExtensionDataTypeClass
+{
+ GArrowExtensionDataTypeClass parent_class;
+};
+
#define GARROW_TYPE_EXTENSION_DATA_TYPE_REGISTRY
\
(garrow_extension_data_type_registry_get_type())
GARROW_AVAILABLE_IN_3_0
diff --git a/c_glib/test/test-fixed-shape-tensor-data-type.rb
b/c_glib/test/test-fixed-shape-tensor-data-type.rb
index abf8bf00db..a4d55c1d1b 100644
--- a/c_glib/test/test-fixed-shape-tensor-data-type.rb
+++ b/c_glib/test/test-fixed-shape-tensor-data-type.rb
@@ -104,4 +104,9 @@ class TestFixedShapeTensorDataType < Test::Unit::TestCase
assert_equal(message,
error.message.lines.first.chomp)
end
+
+ def test_converted_from_cpp
+ schema = Arrow::Schema.new([Arrow::Field.new("tensor", @data_type)])
+ assert_equal(@data_type, schema.fields[0].data_type)
+ end
end
diff --git a/c_glib/test/test-uuid-data-type.rb
b/c_glib/test/test-uuid-data-type.rb
index 74db32c6eb..9fcafe461e 100644
--- a/c_glib/test/test-uuid-data-type.rb
+++ b/c_glib/test/test-uuid-data-type.rb
@@ -32,4 +32,9 @@ class TestUUIDDataType < Test::Unit::TestCase
def test_to_s
assert_equal("extension<arrow.uuid>", @data_type.to_s)
end
+
+ def test_converted_from_cpp
+ schema = Arrow::Schema.new([Arrow::Field.new("uuid", @data_type)])
+ assert_equal(@data_type, schema.fields[0].data_type)
+ end
end