This is an automated email from the ASF dual-hosted git repository.
rok pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new b4f7efe5bd GH-43787: [C++] Register the new Opaque extension type by
default (#43788)
b4f7efe5bd is described below
commit b4f7efe5bdc2218bb595b130b4f65237caecfa76
Author: Rok Mihevc <[email protected]>
AuthorDate: Thu Aug 22 14:45:00 2024 +0200
GH-43787: [C++] Register the new Opaque extension type by default (#43788)
This is to resolve #43787
> The Opaque extension type implementation for C++ (plus python bindings)
was added in https://github.com/apache/arrow/pull/43458, but it was not
registered by default, which we should do for canonical extension types (see
https://github.com/apache/arrow/pull/43458#issuecomment-2302551404)
Additionally, this adds `bool8` extension type builds with
`ARROW_JSON=false` as discussed
[here](https://github.com/apache/arrow/commit/525881987d0b9b4f464c3e3593a9a7b4e3c767d0#r145613657)
### Rationale for this change
Canonical types should be registered by default if possible (except e.g. if
they can't be compiled due to `ARROW_JSON=false`).
### What changes are included in this PR?
This adds default registration for `opaque`, changes when `bool8` is built
and moves all canonical tests under the same test target.
### Are these changes tested?
Changes are tested by previously existing tests.
### Are there any user-facing changes?
`opaue` will now be registered by default and `bool8` will be present in
case `ARROW_JSON=false` at build time.
* GitHub Issue: #43787
Authored-by: Rok Mihevc <[email protected]>
Signed-off-by: Rok Mihevc <[email protected]>
---
cpp/src/arrow/CMakeLists.txt | 2 +-
cpp/src/arrow/extension/CMakeLists.txt | 18 ++++++------------
cpp/src/arrow/extension/bool8.h | 2 ++
cpp/src/arrow/extension/bool8_test.cc | 1 -
cpp/src/arrow/extension/fixed_shape_tensor.h | 2 ++
cpp/src/arrow/extension/opaque.h | 2 ++
cpp/src/arrow/extension/opaque_test.cc | 2 --
cpp/src/arrow/extension_type.cc | 21 +++++++++++++--------
python/pyarrow/tests/test_extension_type.py | 5 ++---
9 files changed, 28 insertions(+), 27 deletions(-)
diff --git a/cpp/src/arrow/CMakeLists.txt b/cpp/src/arrow/CMakeLists.txt
index fb7253b6fd..89f28ee416 100644
--- a/cpp/src/arrow/CMakeLists.txt
+++ b/cpp/src/arrow/CMakeLists.txt
@@ -374,6 +374,7 @@ set(ARROW_SRCS
datum.cc
device.cc
extension_type.cc
+ extension/bool8.cc
pretty_print.cc
record_batch.cc
result.cc
@@ -906,7 +907,6 @@ endif()
if(ARROW_JSON)
arrow_add_object_library(ARROW_JSON
- extension/bool8.cc
extension/fixed_shape_tensor.cc
extension/opaque.cc
json/options.cc
diff --git a/cpp/src/arrow/extension/CMakeLists.txt
b/cpp/src/arrow/extension/CMakeLists.txt
index fcd5fa529a..5cb4bc77af 100644
--- a/cpp/src/arrow/extension/CMakeLists.txt
+++ b/cpp/src/arrow/extension/CMakeLists.txt
@@ -15,22 +15,16 @@
# specific language governing permissions and limitations
# under the License.
-add_arrow_test(test
- SOURCES
- bool8_test.cc
- PREFIX
- "arrow-extension-bool8")
+set(CANONICAL_EXTENSION_TESTS bool8_test.cc)
-add_arrow_test(test
- SOURCES
- fixed_shape_tensor_test.cc
- PREFIX
- "arrow-fixed-shape-tensor")
+if(ARROW_JSON)
+ list(APPEND CANONICAL_EXTENSION_TESTS fixed_shape_tensor_test.cc
opaque_test.cc)
+endif()
add_arrow_test(test
SOURCES
- opaque_test.cc
+ ${CANONICAL_EXTENSION_TESTS}
PREFIX
- "arrow-extension-opaque")
+ "arrow-canonical-extensions")
arrow_install_all_headers("arrow/extension")
diff --git a/cpp/src/arrow/extension/bool8.h b/cpp/src/arrow/extension/bool8.h
index 02e629b28a..fbb507639e 100644
--- a/cpp/src/arrow/extension/bool8.h
+++ b/cpp/src/arrow/extension/bool8.h
@@ -15,6 +15,8 @@
// specific language governing permissions and limitations
// under the License.
+#pragma once
+
#include "arrow/extension_type.h"
namespace arrow::extension {
diff --git a/cpp/src/arrow/extension/bool8_test.cc
b/cpp/src/arrow/extension/bool8_test.cc
index eabcfcf62d..ee77332bc3 100644
--- a/cpp/src/arrow/extension/bool8_test.cc
+++ b/cpp/src/arrow/extension/bool8_test.cc
@@ -19,7 +19,6 @@
#include "arrow/io/memory.h"
#include "arrow/ipc/reader.h"
#include "arrow/ipc/writer.h"
-#include "arrow/testing/extension_type.h"
#include "arrow/testing/gtest_util.h"
namespace arrow {
diff --git a/cpp/src/arrow/extension/fixed_shape_tensor.h
b/cpp/src/arrow/extension/fixed_shape_tensor.h
index 20ec20a64c..80a602021c 100644
--- a/cpp/src/arrow/extension/fixed_shape_tensor.h
+++ b/cpp/src/arrow/extension/fixed_shape_tensor.h
@@ -15,6 +15,8 @@
// specific language governing permissions and limitations
// under the License.
+#pragma once
+
#include "arrow/extension_type.h"
namespace arrow {
diff --git a/cpp/src/arrow/extension/opaque.h b/cpp/src/arrow/extension/opaque.h
index 9814b391cb..5d3411798f 100644
--- a/cpp/src/arrow/extension/opaque.h
+++ b/cpp/src/arrow/extension/opaque.h
@@ -15,6 +15,8 @@
// specific language governing permissions and limitations
// under the License.
+#pragma once
+
#include "arrow/extension_type.h"
#include "arrow/type.h"
diff --git a/cpp/src/arrow/extension/opaque_test.cc
b/cpp/src/arrow/extension/opaque_test.cc
index 1629cdb396..16fcba3fa6 100644
--- a/cpp/src/arrow/extension/opaque_test.cc
+++ b/cpp/src/arrow/extension/opaque_test.cc
@@ -25,7 +25,6 @@
#include "arrow/ipc/reader.h"
#include "arrow/ipc/writer.h"
#include "arrow/record_batch.h"
-#include "arrow/testing/extension_type.h"
#include "arrow/testing/gtest_util.h"
#include "arrow/type_fwd.h"
#include "arrow/util/checked_cast.h"
@@ -169,7 +168,6 @@ TEST(OpaqueType, MetadataRoundTrip) {
TEST(OpaqueType, BatchRoundTrip) {
auto type = internal::checked_pointer_cast<extension::OpaqueType>(
extension::opaque(binary(), "geometry", "adbc.postgresql"));
- ExtensionTypeGuard guard(type);
auto storage = ArrayFromJSON(binary(), R"(["foobar", null])");
auto array = ExtensionType::WrapArray(type, storage);
diff --git a/cpp/src/arrow/extension_type.cc b/cpp/src/arrow/extension_type.cc
index 685018f7de..83c7ebed4f 100644
--- a/cpp/src/arrow/extension_type.cc
+++ b/cpp/src/arrow/extension_type.cc
@@ -27,9 +27,10 @@
#include "arrow/array/util.h"
#include "arrow/chunked_array.h"
#include "arrow/config.h"
-#ifdef ARROW_JSON
#include "arrow/extension/bool8.h"
+#ifdef ARROW_JSON
#include "arrow/extension/fixed_shape_tensor.h"
+#include "arrow/extension/opaque.h"
#endif
#include "arrow/status.h"
#include "arrow/type.h"
@@ -143,17 +144,21 @@ static std::once_flag registry_initialized;
namespace internal {
static void CreateGlobalRegistry() {
+ // Register canonical extension types
+
g_registry = std::make_shared<ExtensionTypeRegistryImpl>();
+ std::vector<std::shared_ptr<DataType>> ext_types{extension::bool8()};
#ifdef ARROW_JSON
- // Register canonical extension types
- auto fst_ext_type =
-
checked_pointer_cast<ExtensionType>(extension::fixed_shape_tensor(int64(), {}));
- ARROW_CHECK_OK(g_registry->RegisterType(fst_ext_type));
-
- auto bool8_ext_type =
checked_pointer_cast<ExtensionType>(extension::bool8());
- ARROW_CHECK_OK(g_registry->RegisterType(bool8_ext_type));
+ ext_types.push_back(extension::fixed_shape_tensor(int64(), {}));
+ ext_types.push_back(extension::opaque(null(), "", ""));
#endif
+
+ // Register canonical extension types
+ for (const auto& ext_type : ext_types) {
+ ARROW_CHECK_OK(
+
g_registry->RegisterType(checked_pointer_cast<ExtensionType>(ext_type)));
+ }
}
} // namespace internal
diff --git a/python/pyarrow/tests/test_extension_type.py
b/python/pyarrow/tests/test_extension_type.py
index b04ee85ec9..0d50c467e9 100644
--- a/python/pyarrow/tests/test_extension_type.py
+++ b/python/pyarrow/tests/test_extension_type.py
@@ -1693,9 +1693,8 @@ def test_opaque_type(pickle_module, storage_type,
storage):
arr = pa.ExtensionArray.from_storage(opaque_type, storage)
assert isinstance(arr, opaque_arr_class)
- with registered_extension_type(opaque_type):
- buf = ipc_write_batch(pa.RecordBatch.from_arrays([arr], ["ext"]))
- batch = ipc_read_batch(buf)
+ buf = ipc_write_batch(pa.RecordBatch.from_arrays([arr], ["ext"]))
+ batch = ipc_read_batch(buf)
assert batch.column(0).type.extension_name == "arrow.opaque"
assert isinstance(batch.column(0), opaque_arr_class)