This is an automated email from the ASF dual-hosted git repository.

fokko pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg-cpp.git


The following commit(s) were added to refs/heads/main by this push:
     new 9fd3d53  Add Arrow C Data Interface and nanoarrow (#44)
9fd3d53 is described below

commit 9fd3d53f3cd96b342f2ef16fe9d7e6eee9c49a2a
Author: Gang Wu <[email protected]>
AuthorDate: Fri Mar 21 16:52:52 2025 +0800

    Add Arrow C Data Interface and nanoarrow (#44)
    
    Closes #33
---
 .github/workflows/cpp-linter.yml                   |  5 +-
 .github/workflows/test.yml                         |  4 +-
 cmake_modules/IcebergThirdpartyToolchain.cmake     | 28 ++++++++
 src/iceberg/CMakeLists.txt                         | 27 +++++++-
 src/iceberg/arrow_c_data.h                         | 71 ++++++++++++++++++++
 src/iceberg/arrow_c_data_internal.cc               | 76 ++++++++++++++++++++++
 .../iceberg/arrow_c_data_internal.h                | 31 ++++-----
 test/CMakeLists.txt                                |  2 +
 test/{ => arrow}/CMakeLists.txt                    | 17 +++--
 test/arrow/arrow_test.cc                           | 62 ++++++++++++++++++
 test/{ => avro}/CMakeLists.txt                     | 16 ++---
 test/{core/avro_unittest.cc => avro/avro_test.cc}  |  0
 test/core/CMakeLists.txt                           |  8 ---
 13 files changed, 297 insertions(+), 50 deletions(-)

diff --git a/.github/workflows/cpp-linter.yml b/.github/workflows/cpp-linter.yml
index da58dca..b31e625 100644
--- a/.github/workflows/cpp-linter.yml
+++ b/.github/workflows/cpp-linter.yml
@@ -30,7 +30,10 @@ jobs:
   cpp-linter:
     runs-on: ubuntu-24.04
     steps:
-      - uses: actions/checkout@v4
+      - name: Checkout iceberg-cpp
+        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # 
v4.2.2
+        with:
+          fetch-depth: 0
       - name: Run build
         run: |
           mkdir build && cd build
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 5029414..43aebad 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -55,8 +55,8 @@ jobs:
         shell: bash
         run: ci/scripts/build_example.sh $(pwd)/example
   macos:
-    name: AArch64 macOS 14
-    runs-on: macos-14
+    name: AArch64 macOS 15
+    runs-on: macos-15
     timeout-minutes: 30
     strategy:
       fail-fast: false
diff --git a/cmake_modules/IcebergThirdpartyToolchain.cmake 
b/cmake_modules/IcebergThirdpartyToolchain.cmake
index 0f95ea4..53b20c1 100644
--- a/cmake_modules/IcebergThirdpartyToolchain.cmake
+++ b/cmake_modules/IcebergThirdpartyToolchain.cmake
@@ -65,6 +65,10 @@ function(resolve_arrow_dependency)
   set(ARROW_BUILD_STATIC
       ON
       CACHE BOOL "" FORCE)
+  # To workaround https://github.com/apache/arrow/pull/45513
+  set(ARROW_IPC
+      ON
+      CACHE BOOL "" FORCE)
   set(ARROW_FILESYSTEM
       OFF
       CACHE BOOL "" FORCE)
@@ -198,3 +202,27 @@ endfunction()
 if(ICEBERG_AVRO)
   resolve_avro_dependency()
 endif()
+
+# ----------------------------------------------------------------------
+# Nanoarrow
+
+# It is also possible to vendor nanoarrow using the bundled source code.
+function(resolve_nanoarrow_dependency)
+  prepare_fetchcontent()
+
+  fetchcontent_declare(nanoarrow
+                       ${FC_DECLARE_COMMON_OPTIONS}
+                       URL 
"https://dlcdn.apache.org/arrow/apache-arrow-nanoarrow-0.6.0/apache-arrow-nanoarrow-0.6.0.tar.gz";
+  )
+  fetchcontent_makeavailable(nanoarrow)
+
+  set_target_properties(nanoarrow PROPERTIES OUTPUT_NAME 
"iceberg_vendored_nanoarrow"
+                                             POSITION_INDEPENDENT_CODE ON)
+  install(TARGETS nanoarrow
+          EXPORT iceberg_targets
+          RUNTIME DESTINATION "${ICEBERG_INSTALL_BINDIR}"
+          ARCHIVE DESTINATION "${ICEBERG_INSTALL_LIBDIR}"
+          LIBRARY DESTINATION "${ICEBERG_INSTALL_LIBDIR}")
+endfunction()
+
+resolve_nanoarrow_dependency()
diff --git a/src/iceberg/CMakeLists.txt b/src/iceberg/CMakeLists.txt
index 8411c7a..0c1475b 100644
--- a/src/iceberg/CMakeLists.txt
+++ b/src/iceberg/CMakeLists.txt
@@ -15,13 +15,36 @@
 # specific language governing permissions and limitations
 # under the License.
 
-set(ICEBERG_SOURCES demo_table.cc schema.cc schema_field.cc type.cc)
+set(ICEBERG_SOURCES
+    arrow_c_data_internal.cc
+    demo_table.cc
+    schema.cc
+    schema_field.cc
+    type.cc)
+
+set(ICEBERG_STATIC_BUILD_INTERFACE_LIBS)
+set(ICEBERG_SHARED_BUILD_INTERFACE_LIBS)
+set(ICEBERG_STATIC_INSTALL_INTERFACE_LIBS)
+set(ICEBERG_SHARED_INSTALL_INTERFACE_LIBS)
+
+list(APPEND ICEBERG_STATIC_BUILD_INTERFACE_LIBS nanoarrow::nanoarrow)
+list(APPEND ICEBERG_SHARED_BUILD_INTERFACE_LIBS nanoarrow::nanoarrow)
+list(APPEND ICEBERG_STATIC_INSTALL_INTERFACE_LIBS "Iceberg::nanoarrow")
+list(APPEND ICEBERG_SHARED_INSTALL_INTERFACE_LIBS "Iceberg::nanoarrow")
 
 add_iceberg_lib(iceberg
                 SOURCES
                 ${ICEBERG_SOURCES}
                 PRIVATE_INCLUDES
-                ${ICEBERG_INCLUDES})
+                ${ICEBERG_INCLUDES}
+                SHARED_LINK_LIBS
+                ${ICEBERG_SHARED_BUILD_INTERFACE_LIBS}
+                STATIC_LINK_LIBS
+                ${ICEBERG_STATIC_BUILD_INTERFACE_LIBS}
+                STATIC_INSTALL_INTERFACE_LIBS
+                ${ICEBERG_STATIC_INSTALL_INTERFACE_LIBS}
+                SHARED_INSTALL_INTERFACE_LIBS
+                ${ICEBERG_SHARED_INSTALL_INTERFACE_LIBS})
 
 iceberg_install_all_headers(iceberg)
 
diff --git a/src/iceberg/arrow_c_data.h b/src/iceberg/arrow_c_data.h
new file mode 100644
index 0000000..43c2adb
--- /dev/null
+++ b/src/iceberg/arrow_c_data.h
@@ -0,0 +1,71 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#pragma once
+
+/// \file iceberg/arrow_c_data.h
+/// Arrow C data interface
+///
+/// The Arrow C Data interface 
(https://arrow.apache.org/docs/format/CDataInterface.html)
+/// is part of the Arrow Columnar Format specification
+/// (https://arrow.apache.org/docs/format/Columnar.html). See the Arrow 
documentation for
+/// documentation of these structures.
+
+#include <cstdint>
+
+#ifndef ARROW_C_DATA_INTERFACE
+#  define ARROW_C_DATA_INTERFACE
+
+extern "C" {
+struct ArrowSchema {
+  // Array type description
+  const char* format;
+  const char* name;
+  const char* metadata;
+  int64_t flags;
+  int64_t n_children;
+  struct ArrowSchema** children;
+  struct ArrowSchema* dictionary;
+
+  // Release callback
+  void (*release)(struct ArrowSchema*);
+  // Opaque producer-specific data
+  void* private_data;
+};
+
+struct ArrowArray {
+  // Array data description
+  int64_t length;
+  int64_t null_count;
+  int64_t offset;
+  int64_t n_buffers;
+  int64_t n_children;
+  const void** buffers;
+  struct ArrowArray** children;
+  struct ArrowArray* dictionary;
+
+  // Release callback
+  void (*release)(struct ArrowArray*);
+  // Opaque producer-specific data
+  void* private_data;
+};
+
+}  // extern "C"
+
+#endif  // ARROW_C_DATA_INTERFACE
diff --git a/src/iceberg/arrow_c_data_internal.cc 
b/src/iceberg/arrow_c_data_internal.cc
new file mode 100644
index 0000000..9716b25
--- /dev/null
+++ b/src/iceberg/arrow_c_data_internal.cc
@@ -0,0 +1,76 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include "iceberg/arrow_c_data_internal.h"
+
+#include <array>
+#include <string>
+#include <utility>
+
+namespace iceberg::internal {
+
+std::pair<ArrowSchema, ArrowArray> 
CreateExampleArrowSchemaAndArrayByNanoarrow() {
+  ArrowSchema out_schema;
+
+  // Initializes the root struct schema
+  NANOARROW_THROW_NOT_OK(ArrowSchemaInitFromType(&out_schema, 
NANOARROW_TYPE_STRUCT));
+  NANOARROW_THROW_NOT_OK(ArrowSchemaAllocateChildren(&out_schema, 2));
+
+  // Set up the non-nullable int64 field
+  struct ArrowSchema* int64_field = out_schema.children[0];
+  ArrowSchemaInit(int64_field);
+  NANOARROW_THROW_NOT_OK(ArrowSchemaInitFromType(int64_field, 
NANOARROW_TYPE_INT64));
+  NANOARROW_THROW_NOT_OK(ArrowSchemaSetName(int64_field, "id"));
+  int64_field->flags &= ~ARROW_FLAG_NULLABLE;
+
+  // Set up the nullable string field
+  struct ArrowSchema* string_field = out_schema.children[1];
+  ArrowSchemaInit(string_field);
+  NANOARROW_THROW_NOT_OK(ArrowSchemaInitFromType(string_field, 
NANOARROW_TYPE_STRING));
+  NANOARROW_THROW_NOT_OK(ArrowSchemaSetName(string_field, "name"));
+  string_field->flags |= ARROW_FLAG_NULLABLE;
+
+  constexpr int64_t kNumValues = 3;
+  std::array<int64_t, kNumValues> int64_values = {1, 2, 3};
+  std::array<std::string, kNumValues> string_values = {"a", "b", "c"};
+
+  ArrowArray out_array;
+  NANOARROW_THROW_NOT_OK(ArrowArrayInitFromSchema(&out_array, &out_schema, 
nullptr));
+  ArrowArray* int64_array = out_array.children[0];
+  ArrowArray* string_array = out_array.children[1];
+
+  NANOARROW_THROW_NOT_OK(ArrowArrayStartAppending(int64_array));
+  NANOARROW_THROW_NOT_OK(ArrowArrayStartAppending(string_array));
+
+  for (int64_t i = 0; i < kNumValues; i++) {
+    NANOARROW_THROW_NOT_OK(ArrowArrayAppendInt(int64_array, int64_values[i]));
+    NANOARROW_THROW_NOT_OK(
+        ArrowArrayAppendString(string_array, 
ArrowCharView(string_values[i].c_str())));
+  }
+
+  NANOARROW_THROW_NOT_OK(ArrowArrayFinishBuildingDefault(int64_array, 
nullptr));
+  NANOARROW_THROW_NOT_OK(ArrowArrayFinishBuildingDefault(string_array, 
nullptr));
+
+  out_array.length = kNumValues;
+  out_array.null_count = 0;
+
+  return {out_schema, out_array};
+}
+
+}  // namespace iceberg::internal
diff --git a/test/core/avro_unittest.cc b/src/iceberg/arrow_c_data_internal.h
similarity index 64%
copy from test/core/avro_unittest.cc
copy to src/iceberg/arrow_c_data_internal.h
index 5bdcfca..2d913c5 100644
--- a/test/core/avro_unittest.cc
+++ b/src/iceberg/arrow_c_data_internal.h
@@ -17,24 +17,17 @@
  * under the License.
  */
 
-#include <gtest/gtest.h>
-#include <iceberg/avro/demo_avro.h>
+#pragma once
 
-TEST(AVROTest, TestDemoAvro) {
-  std::string expected =
-      "{\n\
-    \"type\": \"record\",\n\
-    \"name\": \"testrecord\",\n\
-    \"fields\": [\n\
-        {\n\
-            \"name\": \"testbytes\",\n\
-            \"type\": \"bytes\",\n\
-            \"default\": \"\"\n\
-        }\n\
-    ]\n\
-}\n\
-";
+#include <nanoarrow/nanoarrow.hpp>
 
-  auto avro = iceberg::avro::DemoAvro();
-  EXPECT_EQ(avro.print(), expected);
-}
+namespace iceberg::internal {
+
+/**
+ * @brief Create a simple schema with non-nullable int64 and nullable string 
fields.
+ *
+ * This is the example code to demonstrate the usage of nanoarrow API.
+ */
+std::pair<ArrowSchema, ArrowArray> 
CreateExampleArrowSchemaAndArrayByNanoarrow();
+
+}  // namespace iceberg::internal
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
index c8c7fdf..e29a76e 100644
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -23,4 +23,6 @@ fetchcontent_declare(googletest
                      GTest)
 fetchcontent_makeavailable(googletest)
 
+add_subdirectory(arrow)
+add_subdirectory(avro)
 add_subdirectory(core)
diff --git a/test/CMakeLists.txt b/test/arrow/CMakeLists.txt
similarity index 66%
copy from test/CMakeLists.txt
copy to test/arrow/CMakeLists.txt
index c8c7fdf..0ef6586 100644
--- a/test/CMakeLists.txt
+++ b/test/arrow/CMakeLists.txt
@@ -15,12 +15,11 @@
 # specific language governing permissions and limitations
 # under the License.
 
-fetchcontent_declare(googletest
-                     GIT_REPOSITORY https://github.com/google/googletest.git
-                     GIT_TAG b514bdc898e2951020cbdca1304b75f5950d1f59 # 
release-1.15.2
-                     FIND_PACKAGE_ARGS
-                     NAMES
-                     GTest)
-fetchcontent_makeavailable(googletest)
-
-add_subdirectory(core)
+if(ICEBERG_ARROW)
+  add_executable(arrow_unittest)
+  target_sources(arrow_unittest PRIVATE arrow_test.cc)
+  target_link_libraries(arrow_unittest PRIVATE iceberg_arrow_static 
Arrow::arrow_static
+                                               GTest::gtest_main)
+  target_include_directories(arrow_unittest PRIVATE "${ICEBERG_INCLUDES}")
+  add_test(NAME arrow_unittest COMMAND arrow_unittest)
+endif()
diff --git a/test/arrow/arrow_test.cc b/test/arrow/arrow_test.cc
new file mode 100644
index 0000000..1d730fc
--- /dev/null
+++ b/test/arrow/arrow_test.cc
@@ -0,0 +1,62 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include <arrow/api.h>
+#include <arrow/c/bridge.h>
+#include <arrow/result.h>
+#include <gtest/gtest.h>
+
+#include "iceberg/arrow_c_data_internal.h"
+
+namespace iceberg {
+
+TEST(ArrowCDataTest, CheckArrowSchemaAndArrayByNanoarrow) {
+  auto [schema, array] = 
internal::CreateExampleArrowSchemaAndArrayByNanoarrow();
+
+  auto arrow_schema = ::arrow::ImportSchema(&schema).ValueOrDie();
+  EXPECT_EQ(arrow_schema->num_fields(), 2);
+
+  auto id_field = arrow_schema->field(0);
+  EXPECT_EQ(id_field->name(), "id");
+  EXPECT_EQ(id_field->type()->id(), ::arrow::Type::INT64);
+  EXPECT_FALSE(id_field->nullable());
+
+  auto name_field = arrow_schema->field(1);
+  EXPECT_EQ(name_field->name(), "name");
+  EXPECT_EQ(name_field->type()->id(), ::arrow::Type::STRING);
+  EXPECT_TRUE(name_field->nullable());
+
+  auto arrow_record_batch = ::arrow::ImportRecordBatch(&array, 
arrow_schema).ValueOrDie();
+  EXPECT_EQ(arrow_record_batch->num_rows(), 3);
+  EXPECT_EQ(arrow_record_batch->num_columns(), 2);
+
+  auto id_column = arrow_record_batch->column(0);
+  EXPECT_EQ(id_column->type()->id(), ::arrow::Type::INT64);
+  EXPECT_EQ(id_column->GetScalar(0).ValueOrDie()->ToString(), "1");
+  EXPECT_EQ(id_column->GetScalar(1).ValueOrDie()->ToString(), "2");
+  EXPECT_EQ(id_column->GetScalar(2).ValueOrDie()->ToString(), "3");
+
+  auto name_column = arrow_record_batch->column(1);
+  EXPECT_EQ(name_column->type()->id(), ::arrow::Type::STRING);
+  EXPECT_EQ(name_column->GetScalar(0).ValueOrDie()->ToString(), "a");
+  EXPECT_EQ(name_column->GetScalar(1).ValueOrDie()->ToString(), "b");
+  EXPECT_EQ(name_column->GetScalar(2).ValueOrDie()->ToString(), "c");
+}
+
+}  // namespace iceberg
diff --git a/test/CMakeLists.txt b/test/avro/CMakeLists.txt
similarity index 69%
copy from test/CMakeLists.txt
copy to test/avro/CMakeLists.txt
index c8c7fdf..9cd1c0b 100644
--- a/test/CMakeLists.txt
+++ b/test/avro/CMakeLists.txt
@@ -15,12 +15,10 @@
 # specific language governing permissions and limitations
 # under the License.
 
-fetchcontent_declare(googletest
-                     GIT_REPOSITORY https://github.com/google/googletest.git
-                     GIT_TAG b514bdc898e2951020cbdca1304b75f5950d1f59 # 
release-1.15.2
-                     FIND_PACKAGE_ARGS
-                     NAMES
-                     GTest)
-fetchcontent_makeavailable(googletest)
-
-add_subdirectory(core)
+if(ICEBERG_AVRO)
+  add_executable(avro_unittest)
+  target_sources(avro_unittest PRIVATE avro_test.cc)
+  target_link_libraries(avro_unittest PRIVATE iceberg_avro_static 
GTest::gtest_main)
+  target_include_directories(avro_unittest PRIVATE "${ICEBERG_INCLUDES}")
+  add_test(NAME avro_unittest COMMAND avro_unittest)
+endif()
diff --git a/test/core/avro_unittest.cc b/test/avro/avro_test.cc
similarity index 100%
rename from test/core/avro_unittest.cc
rename to test/avro/avro_test.cc
diff --git a/test/core/CMakeLists.txt b/test/core/CMakeLists.txt
index 23b0844..6e82b9b 100644
--- a/test/core/CMakeLists.txt
+++ b/test/core/CMakeLists.txt
@@ -22,14 +22,6 @@ target_link_libraries(core_unittest PRIVATE iceberg_static 
GTest::gtest_main GTe
 target_include_directories(core_unittest PRIVATE "${ICEBERG_INCLUDES}")
 add_test(NAME core_unittest COMMAND core_unittest)
 
-if(ICEBERG_AVRO)
-  add_executable(avro_unittest)
-  target_sources(avro_unittest PRIVATE avro_unittest.cc)
-  target_link_libraries(avro_unittest PRIVATE iceberg_avro_static 
GTest::gtest_main)
-  target_include_directories(avro_unittest PRIVATE "${ICEBERG_INCLUDES}")
-  add_test(NAME avro_unittest COMMAND avro_unittest)
-endif()
-
 add_executable(expected_test)
 target_sources(expected_test PRIVATE expected_test.cc)
 target_link_libraries(expected_test PRIVATE iceberg_static GTest::gtest_main)

Reply via email to