This is an automated email from the ASF dual-hosted git repository.

amoeba pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-cookbook.git


The following commit(s) were added to refs/heads/main by this push:
     new 86555e1  GH-362: Build Arrow from scratch for dev cookbooks (#374)
86555e1 is described below

commit 86555e18e3c0920771b2ba7a8838a16facd461fc
Author: Bryce Mecum <[email protected]>
AuthorDate: Wed Apr 22 18:55:33 2026 -0700

    GH-362: Build Arrow from scratch for dev cookbooks (#374)
---
 .github/workflows/deploy_development_cookbooks.yml |  2 +
 .github/workflows/test_arrow_nightly_cookbook.yml  |  2 +
 Makefile                                           |  2 +-
 cpp/CONTRIBUTING.md                                |  3 +-
 cpp/code/CMakeLists.txt                            | 99 ++++++++++++++++------
 cpp/code/common.h                                  | 21 +----
 cpp/code/datasets.cc                               |  9 +-
 cpp/code/flight.cc                                 | 22 +++--
 cpp/code/main.cc                                   |  5 ++
 cpp/dev.yml                                        | 13 ++-
 10 files changed, 116 insertions(+), 62 deletions(-)

diff --git a/.github/workflows/deploy_development_cookbooks.yml 
b/.github/workflows/deploy_development_cookbooks.yml
index a8addeb..260ef07 100644
--- a/.github/workflows/deploy_development_cookbooks.yml
+++ b/.github/workflows/deploy_development_cookbooks.yml
@@ -66,6 +66,8 @@ jobs:
         run:
           echo ${CONDA_PREFIX}
       - name: Build cookbook
+        env:
+          - ARROW_NIGHTLY: 1
         run:
           make cpp
       - name: Upload cpp book
diff --git a/.github/workflows/test_arrow_nightly_cookbook.yml 
b/.github/workflows/test_arrow_nightly_cookbook.yml
index 5421d0f..b6b0b98 100644
--- a/.github/workflows/test_arrow_nightly_cookbook.yml
+++ b/.github/workflows/test_arrow_nightly_cookbook.yml
@@ -48,6 +48,8 @@ jobs:
   test_cpp_dev:
     name: "Test C++ Cookbook on Arrow Nightlies"
     runs-on: ubuntu-latest
+    env:
+      ARROW_NIGHTLY: 1
     defaults:
       run:
         shell: bash -l {0}
diff --git a/Makefile b/Makefile
index 1265435..06ceff9 100644
--- a/Makefile
+++ b/Makefile
@@ -88,7 +88,7 @@ cpptest:
        @echo ">>> Running C++ Tests/Snippets <<<\n"
        rm -rf cpp/recipe-test-build
        mkdir cpp/recipe-test-build
-       cd cpp/recipe-test-build && cmake ../code -DCMAKE_BUILD_TYPE=Release && 
cmake --build . && ctest --output-on-failure -j 1
+       cd cpp/recipe-test-build && cmake ../code -G Ninja 
-DCMAKE_BUILD_TYPE=Release && cmake --build . && ctest --output-on-failure -j 1
        mkdir -p cpp/build
        cp cpp/recipe-test-build/recipes_out.arrow cpp/build
 
diff --git a/cpp/CONTRIBUTING.md b/cpp/CONTRIBUTING.md
index a82b37c..2b9d849 100644
--- a/cpp/CONTRIBUTING.md
+++ b/cpp/CONTRIBUTING.md
@@ -95,7 +95,7 @@ output block when the recipe is rendered into the cookbook.
 ## Referencing Arrow C++ Documentation
 
 The Arrow project has its own documentation for the C++ implementation that
-is hosted at https://arrow.apache.org/docs/cpp/index.html. Fortunately,
+is hosted at <https://arrow.apache.org/docs/cpp/index.html>. Fortunately,
 this documentation is also built with Sphinx and so we can use the extension
 `intersphinx` to reference sections of this documentation. To do so simply
 write a standard Sphinx reference like so:
@@ -121,6 +121,7 @@ cmake build. For example:
 ```
 mkdir cpp/code/build
 cd cpp/code/build
+# Optional: Run `export ARROW_NIGHTLY=1` to build Arrow from git.
 cmake ../code -DCMAKE_BUILD_TYPE=Debug
 cmake --build .
 ctest
diff --git a/cpp/code/CMakeLists.txt b/cpp/code/CMakeLists.txt
index 46a15e9..7c9890f 100644
--- a/cpp/code/CMakeLists.txt
+++ b/cpp/code/CMakeLists.txt
@@ -18,23 +18,66 @@
 cmake_minimum_required(VERSION 3.19)
 project(arrow-cookbook)
 
-set(CMAKE_CXX_STANDARD 17)
+set(CMAKE_CXX_STANDARD 20)
 if (CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
-    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -stdlib=libstdc++")
+  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -stdlib=libstdc++")
 endif()
 
 # Add Arrow and other required packages
-find_package(Arrow REQUIRED)
-if(NOT ${ARROW_VERSION} VERSION_GREATER "9.0.0")
-  get_filename_component(ARROW_CMAKE_BASE_DIR ${Arrow_CONFIG} DIRECTORY)
-  list(INSERT CMAKE_MODULE_PATH 0 ${ARROW_CMAKE_BASE_DIR})
+if(DEFINED ENV{ARROW_NIGHTLY})
+  set(CMAKE_BUILD_TYPE Debug)
+  set(ARROW_BUILD_SHARED True)
+  set(ARROW_DEPENDENCY_SOURCE "AUTO")
+  set(ARROW_ENABLE_THREADING ON)
+  set(ARROW_SIMD_LEVEL NONE) # macOS-specific workaround
+
+  set(ARROW_WITH_SNAPPY ON)
+
+  set(ARROW_ACERO ON)
+  set(ARROW_COMPUTE ON)
+  set(ARROW_DATASET ON)
+  set(ARROW_FILESYSTEM ON)
+  set(ARROW_FLIGHT ON)
+  set(ARROW_IPC ON)
+  set(ARROW_PARQUET ON)
+
+  include(FetchContent)
+
+  FetchContent_Declare(Arrow
+        GIT_REPOSITORY https://github.com/apache/arrow.git
+        GIT_TAG main
+        GIT_SHALLOW TRUE SOURCE_SUBDIR cpp
+        OVERRIDE_FIND_PACKAGE
+    )
+
+  FetchContent_MakeAvailable(Arrow)
+
+  # These are some Linux-only things the FetchContent build needs in order
+  # to compile
+  file(INSTALL "${arrow_BINARY_DIR}/src/arrow/util/config.h"
+        DESTINATION "${arrow_SOURCE_DIR}/cpp/src/arrow/util")
+  file(INSTALL "${arrow_BINARY_DIR}/src/parquet/parquet_version.h"
+        DESTINATION "${arrow_SOURCE_DIR}/cpp/src/parquet")
+  target_include_directories(
+        arrow_shared
+        SYSTEM INTERFACE "$<BUILD_INTERFACE:${arrow_SOURCE_DIR}/cpp/src>"
+    )
+  # Force FetchContent Arrow headers to the front of every target's include
+  # list so they take priority over any system Arrow headers added transitively
+  # (e.g. /opt/homebrew/include from GTest::gtest).  Without this the recipe
+  # executables compile against the older installed Arrow headers but link
+  # against the FetchContent Arrow runtime, causing ABI mismatches.
+  include_directories(BEFORE SYSTEM "${arrow_SOURCE_DIR}/cpp/src")
+
+else()
+  find_package(Arrow REQUIRED)
+  find_package(ArrowDataset REQUIRED)
+  find_package(ArrowFlight REQUIRED)
+  find_package(Parquet REQUIRED)
 endif()
-find_package(ArrowDataset REQUIRED)
-find_package(ArrowFlight REQUIRED)
-find_package(Parquet REQUIRED)
 
 if (CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
-    set(CMAKE_CXX_CLANG_TIDY "clang-tidy")
+  set(CMAKE_CXX_CLANG_TIDY "clang-tidy")
 endif()
 
 # Create test targets
@@ -44,31 +87,36 @@ find_package(GTest REQUIRED)
 include(GoogleTest)
 
 function(RECIPE TARGET)
-    add_executable(
+  add_executable(
             ${TARGET}
             ${TARGET}.cc
             common.cc
             main.cc
     )
-    if(TARGET Arrow::arrow_shared)
-        target_link_libraries(
+  if(TARGET Arrow::arrow_shared)
+    target_link_libraries(
             ${TARGET}
             ArrowDataset::arrow_dataset_shared
             ArrowFlight::arrow_flight_shared GTest::gtest
         )
-    else()
-        target_link_libraries(parquet_shared INTERFACE arrow_shared)
-        target_link_libraries(arrow_dataset_shared INTERFACE parquet_shared)
-        target_link_libraries(arrow_flight_shared INTERFACE arrow_shared)
-        target_link_libraries(${TARGET} arrow_dataset_shared 
arrow_flight_shared GTest::gtest)
+  else()
+    target_link_libraries(parquet_shared INTERFACE arrow_shared)
+    target_link_libraries(arrow_dataset_shared INTERFACE parquet_shared)
+    target_link_libraries(arrow_flight_shared INTERFACE arrow_shared)
+    target_link_libraries(${TARGET} arrow_dataset_shared arrow_flight_shared 
GTest::gtest)
+  endif()
+  if (MSVC)
+    target_compile_options(${TARGET} PRIVATE /W4 /WX)
+  else ()
+    target_compile_options(${TARGET} PRIVATE -Wall -Wextra -Wpedantic -Werror)
+    # _Nullable/_Nonnull nullability annotations in absl macros trigger
+    # -Wnullability-extension under -Wpedantic; this is Clang-only.
+    if(CMAKE_CXX_COMPILER_ID MATCHES "Clang")
+      target_compile_options(${TARGET} PRIVATE -Wno-nullability-extension)
     endif()
-    if (MSVC)
-        target_compile_options(${TARGET} PRIVATE /W4 /WX)
-    else ()
-        target_compile_options(${TARGET} PRIVATE -Wall -Wextra -Wpedantic 
-Werror)
-    endif ()
+  endif ()
 
-    gtest_discover_tests(${TARGET})
+  gtest_discover_tests(${TARGET})
 endfunction()
 
 recipe(basic_arrow)
@@ -76,10 +124,9 @@ recipe(creating_arrow_objects)
 recipe(datasets)
 recipe(flight)
 
-
 # Add protobuf to flight
-find_package(gRPC CONFIG REQUIRED)
 find_package(Threads)
+find_package(gRPC CONFIG REQUIRED)
 
 set(PROTO_FILES
     protos/helloworld.proto
diff --git a/cpp/code/common.h b/cpp/code/common.h
index 756ca81..018ee8a 100644
--- a/cpp/code/common.h
+++ b/cpp/code/common.h
@@ -18,30 +18,11 @@
 #ifndef ARROW_COOKBOOK_COMMON_H
 #define ARROW_COOKBOOK_COMMON_H
 
-#include <arrow/result.h>
-#include <arrow/status.h>
+#include <arrow/testing/gtest_util.h>
 
 #include <sstream>
 #include <string>
 
-#define ARROW_STRINGIFY(x) #x
-#define ARROW_CONCAT(x, y) x##y
-
-#define ARROW_ASSIGN_OR_RAISE_NAME(x, y) ARROW_CONCAT(x, y)
-
-#define ASSERT_OK(expr)                                                   \
-  for (const ::arrow::Status _st = ::arrow::ToStatus((expr)); !_st.ok();) \
-  FAIL() << "'" ARROW_STRINGIFY(expr) "' failed with " << _st.ToString()
-
-#define ASSIGN_OR_HANDLE_ERROR_IMPL(handle_error, status_name, lhs, rexpr) \
-  auto&& status_name = (rexpr);                                            \
-  handle_error(status_name.status());                                      \
-  lhs = std::move(status_name).ValueOrDie();
-
-#define ASSERT_OK_AND_ASSIGN(lhs, rexpr) \
-  ASSIGN_OR_HANDLE_ERROR_IMPL(           \
-      ASSERT_OK, ARROW_ASSIGN_OR_RAISE_NAME(_error_or_value, __COUNTER__), 
lhs, rexpr);
-
 inline std::stringstream rout;
 
 void StartRecipe(const std::string& recipe_name);
diff --git a/cpp/code/datasets.cc b/cpp/code/datasets.cc
index 8f0ba8b..3329fde 100644
--- a/cpp/code/datasets.cc
+++ b/cpp/code/datasets.cc
@@ -87,10 +87,15 @@ class DatasetReadingTest : public ::testing::Test {
                           fs->OpenInputFile(airquality_path));
     std::unique_ptr<parquet::ParquetFileReader> parquet_reader =
         parquet::ParquetFileReader::Open(file);
-    ARROW_ASSIGN_OR_RAISE(auto reader, parquet::arrow::FileReader::Make(
-        arrow::default_memory_pool(), std::move(parquet_reader)));
+    ARROW_ASSIGN_OR_RAISE(auto reader,
+                          
parquet::arrow::FileReader::Make(arrow::default_memory_pool(),
+                                                           
std::move(parquet_reader)));
     std::shared_ptr<arrow::Table> table;
+#if ARROW_VERSION_MAJOR >= 24
+    ARROW_ASSIGN_OR_RAISE(table, reader->ReadTable());
+#else
     ARROW_RETURN_NOT_OK(reader->ReadTable(&table));
+#endif
     return table;
   }
 
diff --git a/cpp/code/flight.cc b/cpp/code/flight.cc
index 7cd0317..038f43e 100644
--- a/cpp/code/flight.cc
+++ b/cpp/code/flight.cc
@@ -97,7 +97,11 @@ class ParquetStorageService : public 
arrow::flight::FlightServerBase {
         parquet::arrow::OpenFile(std::move(input), 
arrow::default_memory_pool()));
 
     std::shared_ptr<arrow::Table> table;
+#if ARROW_VERSION_MAJOR >= 24
+    ARROW_ASSIGN_OR_RAISE(table, reader->ReadTable());
+#else
     ARROW_RETURN_NOT_OK(reader->ReadTable(&table));
+#endif
     // Note that we can't directly pass TableBatchReader to
     // RecordBatchStream because TableBatchReader keeps a non-owning
     // reference to the underlying Table, which would then get freed
@@ -148,7 +152,7 @@ class ParquetStorageService : public 
arrow::flight::FlightServerBase {
     endpoint.ticket.ticket = file_info.base_name();
     arrow::flight::Location location;
     ARROW_ASSIGN_OR_RAISE(location,
-        arrow::flight::Location::ForGrpcTcp("localhost", port()));
+                          arrow::flight::Location::ForGrpcTcp("localhost", 
port()));
     endpoint.locations.push_back(location);
 
     int64_t total_records = reader->parquet_reader()->metadata()->num_rows();
@@ -197,7 +201,7 @@ arrow::Status TestPutGetDelete() {
 
   arrow::flight::Location server_location;
   ARROW_ASSIGN_OR_RAISE(server_location,
-      arrow::flight::Location::ForGrpcTcp("0.0.0.0", 0));
+                        arrow::flight::Location::ForGrpcTcp("0.0.0.0", 0));
 
   arrow::flight::FlightServerOptions options(server_location);
   auto server = std::unique_ptr<arrow::flight::FlightServerBase>(
@@ -209,7 +213,7 @@ arrow::Status TestPutGetDelete() {
   StartRecipe("ParquetStorageService::Connect");
   arrow::flight::Location location;
   ARROW_ASSIGN_OR_RAISE(location,
-      arrow::flight::Location::ForGrpcTcp("localhost", server->port()));
+                        arrow::flight::Location::ForGrpcTcp("localhost", 
server->port()));
 
   std::unique_ptr<arrow::flight::FlightClient> client;
   ARROW_ASSIGN_OR_RAISE(client, 
arrow::flight::FlightClient::Connect(location));
@@ -315,7 +319,7 @@ arrow::Status TestClientOptions() {
 
   arrow::flight::Location server_location;
   ARROW_ASSIGN_OR_RAISE(server_location,
-      arrow::flight::Location::ForGrpcTcp("0.0.0.0", 0));
+                        arrow::flight::Location::ForGrpcTcp("0.0.0.0", 0));
 
   arrow::flight::FlightServerOptions options(server_location);
   auto server = std::unique_ptr<arrow::flight::FlightServerBase>(
@@ -329,12 +333,12 @@ arrow::Status TestClientOptions() {
 
   arrow::flight::Location location;
   ARROW_ASSIGN_OR_RAISE(location,
-      arrow::flight::Location::ForGrpcTcp("localhost", server->port()));
+                        arrow::flight::Location::ForGrpcTcp("localhost", 
server->port()));
 
   std::unique_ptr<arrow::flight::FlightClient> client;
   // pass client_options into Connect()
   ARROW_ASSIGN_OR_RAISE(client,
-      arrow::flight::FlightClient::Connect(location, client_options));
+                        arrow::flight::FlightClient::Connect(location, 
client_options));
   rout << "Connected to " << location.ToString() << std::endl;
   EndRecipe("TestClientOptions::Connect");
 
@@ -352,7 +356,7 @@ arrow::Status TestCustomGrpcImpl() {
   StartRecipe("CustomGrpcImpl::StartServer");
   arrow::flight::Location server_location;
   ARROW_ASSIGN_OR_RAISE(server_location,
-      arrow::flight::Location::ForGrpcTcp("0.0.0.0", 5000));
+                        arrow::flight::Location::ForGrpcTcp("0.0.0.0", 0));
 
   arrow::flight::FlightServerOptions options(server_location);
   auto server = std::unique_ptr<arrow::flight::FlightServerBase>(
@@ -372,8 +376,8 @@ arrow::Status TestCustomGrpcImpl() {
   EndRecipe("CustomGrpcImpl::StartServer");
 
   StartRecipe("CustomGrpcImpl::CreateClient");
-  auto client_channel =
-      grpc::CreateChannel("0.0.0.0:5000", grpc::InsecureChannelCredentials());
+  auto client_channel = grpc::CreateChannel("0.0.0.0:" + 
std::to_string(server->port()),
+                                            
grpc::InsecureChannelCredentials());
 
   auto stub = HelloWorldService::NewStub(client_channel);
 
diff --git a/cpp/code/main.cc b/cpp/code/main.cc
index 3fbe3ea..e32e922 100644
--- a/cpp/code/main.cc
+++ b/cpp/code/main.cc
@@ -19,12 +19,17 @@
 
 #include <filesystem>
 
+#include <arrow/compute/api.h>
 #include <arrow/status.h>
 #include "gtest/gtest.h"
 
 #include "common.h"
 
 int main(int argc, char** argv) {
+  if (!arrow::compute::Initialize().ok()) {
+    std::cerr << "Failed to initialize Arrow compute functions" << std::endl;
+    return -1;
+  }
   testing::InitGoogleTest(&argc, argv);
   int retval = RUN_ALL_TESTS();
   if (retval == 0 && HasRecipeOutput()) {
diff --git a/cpp/dev.yml b/cpp/dev.yml
index d461f87..90eda6e 100644
--- a/cpp/dev.yml
+++ b/cpp/dev.yml
@@ -16,15 +16,22 @@
 
 name: cookbook-cpp-dev
 channels:
-  - arrow-nightlies
   - conda-forge
 dependencies:
   - python=3.10
   - compilers
-  - arrow-nightlies::libarrow
+  - cmake
+  - ninja
   - sphinx
   - gtest
   - gmock
-  - arrow-nightlies::pyarrow
   - clang-tools
   - zlib
+  - grpc-cpp
+  - protobuf
+  - abseil-cpp
+  - c-ares
+  - re2
+  - thrift-cpp
+  - rapidjson
+  - snappy

Reply via email to