This is an automated email from the ASF dual-hosted git repository. kou pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push: new f055f5e554 GH-36886: [C++] Configure `azurite` in preparation for testing Azure C++ filesystem (#36988) f055f5e554 is described below commit f055f5e554020c10c343249e95b940a751eb3239 Author: Thomas Newton <thomas.w.new...@gmail.com> AuthorDate: Mon Aug 7 21:48:29 2023 +0100 GH-36886: [C++] Configure `azurite` in preparation for testing Azure C++ filesystem (#36988) ### Rationale for this change We need to write tests for https://github.com/apache/arrow/issues/18014. azurite is like a fake Azure blob storage so it can be used to write integration tests ### What changes are included in this PR? Extract the `azurite` related changes from https://github.com/apache/arrow/pull/12914 to create a smaller PR that's easier to review. I have made very minimal changes compared to that PR. Currently `azurite` is configured for all the environments where `ARROW_AZURE` was enabled by https://github.com/apache/arrow/pull/35701. I assume its deliberate that its not enabled yet for windows, alpine, conda, debian or fedora builds. ### Are these changes tested? Its tested by there aren't really any good tests in this PR. I used this `azurite` config in https://github.com/apache/arrow/pull/36835 to make an integration test that uses the Azure C++ SDK. On its own we can't really write tests for this `azurite` setup PR. ### Are there any user-facing changes? No * Closes: #36886 Lead-authored-by: Thomas Newton <thomas.w.new...@gmail.com> Co-authored-by: Sutou Kouhei <k...@cozmixng.org> Signed-off-by: Sutou Kouhei <k...@clear-code.com> --- .github/workflows/cpp.yml | 4 ++ ci/docker/ubuntu-20.04-cpp.dockerfile | 4 ++ ci/docker/ubuntu-22.04-cpp.dockerfile | 4 ++ cpp/Brewfile => ci/scripts/install_azurite.sh | 44 +++++++------- cpp/Brewfile | 1 + cpp/src/arrow/filesystem/azurefs_test.cc | 82 ++++++++++++++++++++++++++- 6 files changed, 114 insertions(+), 25 deletions(-) diff --git a/.github/workflows/cpp.yml b/.github/workflows/cpp.yml index cd12be1148..eaccf25403 100644 --- a/.github/workflows/cpp.yml +++ b/.github/workflows/cpp.yml @@ -23,6 +23,7 @@ on: - '.github/workflows/cpp.yml' - 'ci/docker/**' - 'ci/scripts/cpp_*' + - 'ci/scripts/install_azurite.sh' - 'ci/scripts/install_gcs_testbench.sh' - 'ci/scripts/install_minio.sh' - 'ci/scripts/msys2_*' @@ -34,6 +35,7 @@ on: - '.github/workflows/cpp.yml' - 'ci/docker/**' - 'ci/scripts/cpp_*' + - 'ci/scripts/install_azurite.sh' - 'ci/scripts/install_gcs_testbench.sh' - 'ci/scripts/install_minio.sh' - 'ci/scripts/msys2_*' @@ -201,6 +203,8 @@ jobs: ci/scripts/install_minio.sh latest /usr/local - name: Install Google Cloud Storage Testbench run: ci/scripts/install_gcs_testbench.sh default + - name: Install Azurite Storage Emulator + run: ci/scripts/install_azurite.sh - name: Setup ccache run: | ci/scripts/ccache_setup.sh diff --git a/ci/docker/ubuntu-20.04-cpp.dockerfile b/ci/docker/ubuntu-20.04-cpp.dockerfile index f94494177e..125f1f48d4 100644 --- a/ci/docker/ubuntu-20.04-cpp.dockerfile +++ b/ci/docker/ubuntu-20.04-cpp.dockerfile @@ -103,6 +103,7 @@ RUN apt-get update -y -q && \ make \ ninja-build \ nlohmann-json3-dev \ + npm \ pkg-config \ protobuf-compiler \ python3-dev \ @@ -123,6 +124,9 @@ RUN /arrow/ci/scripts/install_minio.sh latest /usr/local COPY ci/scripts/install_gcs_testbench.sh /arrow/ci/scripts/ RUN /arrow/ci/scripts/install_gcs_testbench.sh default +COPY ci/scripts/install_azurite.sh /arrow/ci/scripts/ +RUN /arrow/ci/scripts/install_azurite.sh + COPY ci/scripts/install_ceph.sh /arrow/ci/scripts/ RUN /arrow/ci/scripts/install_ceph.sh diff --git a/ci/docker/ubuntu-22.04-cpp.dockerfile b/ci/docker/ubuntu-22.04-cpp.dockerfile index e773c6f1ee..0840b3fa5c 100644 --- a/ci/docker/ubuntu-22.04-cpp.dockerfile +++ b/ci/docker/ubuntu-22.04-cpp.dockerfile @@ -102,6 +102,7 @@ RUN apt-get update -y -q && \ make \ ninja-build \ nlohmann-json3-dev \ + npm \ pkg-config \ protobuf-compiler \ protobuf-compiler-grpc \ @@ -153,6 +154,9 @@ RUN /arrow/ci/scripts/install_minio.sh latest /usr/local COPY ci/scripts/install_gcs_testbench.sh /arrow/ci/scripts/ RUN /arrow/ci/scripts/install_gcs_testbench.sh default +COPY ci/scripts/install_azurite.sh /arrow/ci/scripts/ +RUN /arrow/ci/scripts/install_azurite.sh + COPY ci/scripts/install_sccache.sh /arrow/ci/scripts/ RUN /arrow/ci/scripts/install_sccache.sh unknown-linux-musl /usr/local/bin diff --git a/cpp/Brewfile b/ci/scripts/install_azurite.sh old mode 100644 new mode 100755 similarity index 69% copy from cpp/Brewfile copy to ci/scripts/install_azurite.sh index 580e8d3f11..2e7008360f --- a/cpp/Brewfile +++ b/ci/scripts/install_azurite.sh @@ -1,3 +1,5 @@ +#!/usr/bin/env bash +# # Licensed to the Apache Software Foundation (ASF) under one # or more contributor license agreements. See the NOTICE file # distributed with this work for additional information @@ -15,27 +17,21 @@ # specific language governing permissions and limitations # under the License. -brew "aws-sdk-cpp" -brew "bash" -brew "boost" -brew "brotli" -brew "c-ares" -brew "ccache" -brew "cmake" -brew "flatbuffers" -brew "git" -brew "glog" -brew "googletest" -brew "grpc" -brew "llvm@14" -brew "lz4" -brew "ninja" -brew "openssl@3" -brew "protobuf" -brew "python" -brew "rapidjson" -brew "snappy" -brew "thrift" -brew "wget" -brew "xsimd" -brew "zstd" +set -e + +case "$(uname)" in + Darwin) + npm install -g azurite + which azurite + ;; + MINGW*) + choco install nodejs.install + npm install -g azurite + ;; + Linux) + npm install -g azurite + which azurite + ;; +esac +echo "node version = $(node --version)" +echo "azurite version = $(azurite --version)" \ No newline at end of file diff --git a/cpp/Brewfile b/cpp/Brewfile index 580e8d3f11..58015d2121 100644 --- a/cpp/Brewfile +++ b/cpp/Brewfile @@ -30,6 +30,7 @@ brew "grpc" brew "llvm@14" brew "lz4" brew "ninja" +brew "node" brew "openssl@3" brew "protobuf" brew "python" diff --git a/cpp/src/arrow/filesystem/azurefs_test.cc b/cpp/src/arrow/filesystem/azurefs_test.cc index 0f03e88393..e940c5bd1b 100644 --- a/cpp/src/arrow/filesystem/azurefs_test.cc +++ b/cpp/src/arrow/filesystem/azurefs_test.cc @@ -15,7 +15,26 @@ // specific language governing permissions and limitations // under the License. +#include <algorithm> // Missing include in boost/process + +// This boost/asio/io_context.hpp include is needless for no MinGW +// build. +// +// This is for including boost/asio/detail/socket_types.hpp before any +// "#include <windows.h>". boost/asio/detail/socket_types.hpp doesn't +// work if windows.h is already included. boost/process.h -> +// boost/process/args.hpp -> boost/process/detail/basic_cmd.hpp +// includes windows.h. boost/process/args.hpp is included before +// boost/process/async.h that includes +// boost/asio/detail/socket_types.hpp implicitly is included. +#include <boost/asio/io_context.hpp> +// We need BOOST_USE_WINDOWS_H definition with MinGW when we use +// boost/process.hpp. See BOOST_USE_WINDOWS_H=1 in +// cpp/cmake_modules/ThirdpartyToolchain.cmake for details. +#include <boost/process.hpp> + #include "arrow/filesystem/azurefs.h" +#include "arrow/util/io_util.h" #include <gmock/gmock-matchers.h> #include <gmock/gmock-more-matchers.h> @@ -27,15 +46,76 @@ #include "arrow/testing/util.h" namespace arrow { +using internal::TemporaryDir; namespace fs { namespace { +namespace bp = boost::process; using ::testing::IsEmpty; using ::testing::Not; using ::testing::NotNull; -// Placeholder test for file structure +class AzuriteEnv : public ::testing::Environment { + public: + AzuriteEnv() { + account_name_ = "devstoreaccount1"; + account_key_ = + "Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/" + "KBHBeksoGMGw=="; + auto exe_path = bp::search_path("azurite"); + if (exe_path.empty()) { + auto error = std::string("Could not find Azurite emulator."); + status_ = Status::Invalid(error); + return; + } + auto temp_dir_ = *TemporaryDir::Make("azurefs-test-"); + server_process_ = bp::child(boost::this_process::environment(), exe_path, "--silent", + "--location", temp_dir_->path().ToString(), "--debug", + temp_dir_->path().ToString() + "/debug.log"); + if (!(server_process_.valid() && server_process_.running())) { + auto error = "Could not start Azurite emulator."; + server_process_.terminate(); + server_process_.wait(); + status_ = Status::Invalid(error); + return; + } + status_ = Status::OK(); + } + + ~AzuriteEnv() override { + server_process_.terminate(); + server_process_.wait(); + } + + const std::string& account_name() const { return account_name_; } + const std::string& account_key() const { return account_key_; } + const Status status() const { return status_; } + + private: + std::string account_name_; + std::string account_key_; + bp::child server_process_; + Status status_; + std::unique_ptr<TemporaryDir> temp_dir_; +}; + +auto* azurite_env = ::testing::AddGlobalTestEnvironment(new AzuriteEnv); + +AzuriteEnv* GetAzuriteEnv() { + return ::arrow::internal::checked_cast<AzuriteEnv*>(azurite_env); +} + +// Placeholder tests for file structure // TODO: GH-18014 Remove once a proper test is added +TEST(AzureFileSystem, InitialiseAzurite) { + const std::string& account_name = GetAzuriteEnv()->account_name(); + const std::string& account_key = GetAzuriteEnv()->account_key(); + EXPECT_EQ(account_name, "devstoreaccount1"); + EXPECT_EQ(account_key, + "Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/" + "K1SZFPTOtr/KBHBeksoGMGw=="); +} + TEST(AzureFileSystem, OptionsCompare) { AzureOptions options; EXPECT_TRUE(options.Equals(options));