emkornfield commented on a change in pull request #11331: URL: https://github.com/apache/arrow/pull/11331#discussion_r724314084
########## File path: cpp/src/arrow/filesystem/gcsfs.cc ########## @@ -0,0 +1,143 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "arrow/filesystem/gcsfs.h" + +#include <google/cloud/storage/client.h> + +#include <sstream> + +#include "arrow/filesystem/path_util.h" +#include "arrow/result.h" +#include "arrow/util/checked_cast.h" + +namespace arrow { +namespace fs { + +namespace gcs = google::cloud::storage; + +google::cloud::Options AsGoogleCloudOptions(GcsOptions const& o) { + auto options = google::cloud::Options{}; Review comment: Is there a reason to prefer this over `google::cloud::Options options`; ? ########## File path: cpp/src/arrow/filesystem/gcsfs.cc ########## @@ -0,0 +1,143 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "arrow/filesystem/gcsfs.h" + +#include <google/cloud/storage/client.h> + +#include <sstream> + +#include "arrow/filesystem/path_util.h" +#include "arrow/result.h" +#include "arrow/util/checked_cast.h" + +namespace arrow { +namespace fs { + +namespace gcs = google::cloud::storage; + +google::cloud::Options AsGoogleCloudOptions(GcsOptions const& o) { + auto options = google::cloud::Options{}; + if (!o.endpoint_override.empty()) { + auto scheme = o.scheme; + if (scheme.empty()) scheme = "https"; + options.set<gcs::RestEndpointOption>(scheme + "://" + o.endpoint_override); + } + return options; +} + +class GcsFileSystem::Impl { + public: + explicit Impl(GcsOptions const& o) : client_(AsGoogleCloudOptions(o)) {} + + private: + gcs::Client client_; +}; + +std::string GcsFileSystem::type_name() const { return "gcs"; } + +bool GcsFileSystem::Equals(const FileSystem& other) const { + if (this == &other) { + return true; + } + if (other.type_name() != type_name()) { + return false; + } + const auto& fs = ::arrow::internal::checked_cast<const GcsFileSystem&>(other); + return impl_ == fs.impl_; +} + +Result<FileInfo> GcsFileSystem::GetFileInfo(const std::string& path) { + return Status::NotImplemented("The GCS FileSystem is not fully implemented"); +} + +Result<FileInfoVector> GcsFileSystem::GetFileInfo(const FileSelector& select) { + return Status::NotImplemented("The GCS FileSystem is not fully implemented"); +} + +Status GcsFileSystem::CreateDir(const std::string& path, bool recursive) { + return Status::NotImplemented("The GCS FileSystem is not fully implemented"); +} + +Status GcsFileSystem::DeleteDir(const std::string& path) { + return Status::NotImplemented("The GCS FileSystem is not fully implemented"); +} + +Status GcsFileSystem::DeleteDirContents(const std::string& path) { + return Status::NotImplemented("The GCS FileSystem is not fully implemented"); +} + +Status GcsFileSystem::DeleteRootDirContents() { + return Status::NotImplemented("The GCS FileSystem is not fully implemented"); +} + +Status GcsFileSystem::DeleteFile(const std::string& path) { + return Status::NotImplemented("The GCS FileSystem is not fully implemented"); +} + +Status GcsFileSystem::Move(const std::string& src, const std::string& dest) { + return Status::NotImplemented("The GCS FileSystem is not fully implemented"); +} + +Status GcsFileSystem::CopyFile(const std::string& src, const std::string& dest) { + return Status::NotImplemented("The GCS FileSystem is not fully implemented"); +} + +Result<std::shared_ptr<io::InputStream>> GcsFileSystem::OpenInputStream( + const std::string& path) { + return Status::NotImplemented("The GCS FileSystem is not fully implemented"); +} + +Result<std::shared_ptr<io::InputStream>> GcsFileSystem::OpenInputStream( + const FileInfo& info) { + return Status::NotImplemented("The GCS FileSystem is not fully implemented"); +} + +Result<std::shared_ptr<io::RandomAccessFile>> GcsFileSystem::OpenInputFile( + const std::string& path) { + return Status::NotImplemented("The GCS FileSystem is not fully implemented"); +} + +Result<std::shared_ptr<io::RandomAccessFile>> GcsFileSystem::OpenInputFile( + const FileInfo& info) { + return Status::NotImplemented("The GCS FileSystem is not fully implemented"); +} + +Result<std::shared_ptr<io::OutputStream>> GcsFileSystem::OpenOutputStream( + const std::string& path, const std::shared_ptr<const KeyValueMetadata>& metadata) { + return Status::NotImplemented("The GCS FileSystem is not fully implemented"); +} + +Result<std::shared_ptr<io::OutputStream>> GcsFileSystem::OpenAppendStream( + const std::string&, const std::shared_ptr<const KeyValueMetadata>&) { + return Status::NotImplemented("Append is not supported in GCS"); +} + +GcsFileSystem::GcsFileSystem(const GcsOptions& options, const io::IOContext& context) + : FileSystem(context), impl_(std::make_shared<Impl>(options)) {} + +namespace internal { + +std::shared_ptr<GcsFileSystem> MakeGcsFileSystemForTest(const GcsOptions& options) { + return std::shared_ptr<GcsFileSystem>( Review comment: maybe comment on why std::make_shared isn't used. ########## File path: cpp/src/arrow/filesystem/gcsfs_test.cc ########## @@ -0,0 +1,51 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "arrow/filesystem/gcsfs.h" + +#include <gmock/gmock-matchers.h> +#include <gmock/gmock-more-matchers.h> +#include <gtest/gtest.h> + +#include <string> + +#include "arrow/testing/gtest_util.h" +#include "arrow/testing/util.h" + +namespace arrow { +namespace fs { +namespace { + +using ::testing::IsEmpty; +using ::testing::Not; +using ::testing::NotNull; + +TEST(GCSFileSystem, Compare) { + auto a = internal::MakeGcsFileSystemForTest(GcsOptions{}); + EXPECT_THAT(a.get(), NotNull()); + EXPECT_EQ(a, a); + + auto b = internal::MakeGcsFileSystemForTest(GcsOptions{}); + EXPECT_THAT(b.get(), NotNull()); + EXPECT_EQ(b, b); + + EXPECT_NE(a, b); Review comment: I think Equals here is mean logically equal, shouldn't this be equal? ########## File path: cpp/src/arrow/filesystem/gcsfs_test.cc ########## @@ -0,0 +1,51 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "arrow/filesystem/gcsfs.h" + +#include <gmock/gmock-matchers.h> +#include <gmock/gmock-more-matchers.h> +#include <gtest/gtest.h> + +#include <string> + +#include "arrow/testing/gtest_util.h" +#include "arrow/testing/util.h" + +namespace arrow { +namespace fs { +namespace { + +using ::testing::IsEmpty; +using ::testing::Not; +using ::testing::NotNull; + +TEST(GCSFileSystem, Compare) { + auto a = internal::MakeGcsFileSystemForTest(GcsOptions{}); + EXPECT_THAT(a.get(), NotNull()); + EXPECT_EQ(a, a); Review comment: should this be comparing the objects `EXPECT_EQ(*a, *a)` ########## File path: cpp/src/arrow/filesystem/gcsfs.cc ########## @@ -0,0 +1,143 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "arrow/filesystem/gcsfs.h" + +#include <google/cloud/storage/client.h> + +#include <sstream> + +#include "arrow/filesystem/path_util.h" +#include "arrow/result.h" +#include "arrow/util/checked_cast.h" + +namespace arrow { +namespace fs { + +namespace gcs = google::cloud::storage; + +google::cloud::Options AsGoogleCloudOptions(GcsOptions const& o) { + auto options = google::cloud::Options{}; + if (!o.endpoint_override.empty()) { + auto scheme = o.scheme; Review comment: please spell out the type. ########## File path: cpp/src/arrow/filesystem/gcsfs_test.cc ########## @@ -0,0 +1,51 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "arrow/filesystem/gcsfs.h" + +#include <gmock/gmock-matchers.h> +#include <gmock/gmock-more-matchers.h> +#include <gtest/gtest.h> + +#include <string> + +#include "arrow/testing/gtest_util.h" +#include "arrow/testing/util.h" + +namespace arrow { +namespace fs { +namespace { + +using ::testing::IsEmpty; +using ::testing::Not; +using ::testing::NotNull; + +TEST(GCSFileSystem, Compare) { + auto a = internal::MakeGcsFileSystemForTest(GcsOptions{}); + EXPECT_THAT(a.get(), NotNull()); + EXPECT_EQ(a, a); + + auto b = internal::MakeGcsFileSystemForTest(GcsOptions{}); + EXPECT_THAT(b.get(), NotNull()); Review comment: I don't think .get() is necessary here and above. ########## File path: cpp/src/arrow/filesystem/gcsfs.cc ########## @@ -0,0 +1,143 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "arrow/filesystem/gcsfs.h" + +#include <google/cloud/storage/client.h> + +#include <sstream> + +#include "arrow/filesystem/path_util.h" +#include "arrow/result.h" +#include "arrow/util/checked_cast.h" + +namespace arrow { +namespace fs { + +namespace gcs = google::cloud::storage; + +google::cloud::Options AsGoogleCloudOptions(GcsOptions const& o) { Review comment: const should be in front of GcsOptions? ########## File path: cpp/src/arrow/filesystem/gcsfs.h ########## @@ -0,0 +1,130 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include <memory> +#include <string> +#include <vector> + +#include "arrow/filesystem/filesystem.h" + +namespace arrow { +namespace fs { +class GcsFileSystem; +struct GcsOptions; +namespace internal { +// TODO(ARROW-1231) - during development only tests should create a GcsFileSystem. +// Remove, and provide a public API, before declaring the feature complete. +std::shared_ptr<GcsFileSystem> MakeGcsFileSystemForTest(const GcsOptions& options); Review comment: The TODO covers make a static method [Make](https://github.com/apache/arrow/blob/master/cpp/src/arrow/filesystem/s3fs.h#L281) on GcsFileSystem ########## File path: cpp/src/arrow/filesystem/gcsfs.h ########## @@ -0,0 +1,111 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include <memory> +#include <string> +#include <vector> + +#include "arrow/filesystem/filesystem.h" + +namespace arrow { +namespace fs { +class GCSFileSystem; +struct GCSOptions; +namespace internal { +// TODO(ARROW-1231) - during development only tests should create a GCSFileSystem. +// Remove before declaring the feature complete. +std::shared_ptr<GCSFileSystem> MakeGCSFileSystemForTest(const GCSOptions& options); +} // namespace internal + +struct ARROW_EXPORT GCSOptions { + std::string endpoint_override; + std::string scheme; + + bool Equals(const GCSOptions& other) const; + + /// \brief Initialize with default credentials provider chain + /// + /// This is recommended if you use the standard AWS environment variables + /// and/or configuration file. + static GCSOptions Defaults(); + + /// \brief Initialize with anonymous credentials. + /// + /// This will only let you access public buckets. + static GCSOptions Anonymous(); +}; + +class ARROW_EXPORT GCSFileSystem : public FileSystem { Review comment: Generally, google style, There are some cases where we've used the all caps spelling though (e.g. CSV) ########## File path: cpp/src/arrow/filesystem/gcsfs.cc ########## @@ -0,0 +1,143 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "arrow/filesystem/gcsfs.h" + +#include <google/cloud/storage/client.h> + +#include <sstream> + +#include "arrow/filesystem/path_util.h" +#include "arrow/result.h" +#include "arrow/util/checked_cast.h" + +namespace arrow { +namespace fs { + +namespace gcs = google::cloud::storage; + +google::cloud::Options AsGoogleCloudOptions(GcsOptions const& o) { + auto options = google::cloud::Options{}; Review comment: Yeah, the almost always use auto (my undertanding this is from effective C++) directly contradicts the style guide in use (which is only use auto if type is obvious). In this case type is obvious, it just seems like there is more text in this case. ########## File path: cpp/src/arrow/filesystem/gcsfs.h ########## @@ -0,0 +1,130 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include <memory> +#include <string> +#include <vector> + +#include "arrow/filesystem/filesystem.h" + +namespace arrow { +namespace fs { +class GcsFileSystem; +struct GcsOptions; +namespace internal { +// TODO(ARROW-1231) - during development only tests should create a GcsFileSystem. +// Remove, and provide a public API, before declaring the feature complete. +std::shared_ptr<GcsFileSystem> MakeGcsFileSystemForTest(const GcsOptions& options); Review comment: The TODO covers make a static method [Make](https://github.com/apache/arrow/blob/master/cpp/src/arrow/filesystem/s3fs.h#L281) on GcsFileSystem? ########## File path: cpp/src/arrow/filesystem/gcsfs_test.cc ########## @@ -0,0 +1,51 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "arrow/filesystem/gcsfs.h" + +#include <gmock/gmock-matchers.h> +#include <gmock/gmock-more-matchers.h> +#include <gtest/gtest.h> + +#include <string> + +#include "arrow/testing/gtest_util.h" +#include "arrow/testing/util.h" + +namespace arrow { +namespace fs { +namespace { + +using ::testing::IsEmpty; +using ::testing::Not; +using ::testing::NotNull; + +TEST(GCSFileSystem, Compare) { + auto a = internal::MakeGcsFileSystemForTest(GcsOptions{}); + EXPECT_THAT(a.get(), NotNull()); + EXPECT_EQ(a, a); + + auto b = internal::MakeGcsFileSystemForTest(GcsOptions{}); + EXPECT_THAT(b.get(), NotNull()); + EXPECT_EQ(b, b); + + EXPECT_NE(a, b); Review comment: yeah, I meant configuration parameters are equal. Before this looked like it was using solely memory address equality. ########## File path: cpp/CMakeLists.txt ########## @@ -801,7 +801,11 @@ endif() set(ARROW_SHARED_PRIVATE_LINK_LIBS ${ARROW_STATIC_LINK_LIBS}) # boost::filesystem is needed for S3 and Flight tests as a boost::process dependency. -if(((ARROW_FLIGHT OR ARROW_S3) AND (ARROW_BUILD_TESTS OR ARROW_BUILD_INTEGRATION))) +if(((ARROW_FLIGHT + OR ARROW_S3 Review comment: Not sure if the mac build is a transient error or somehow due to this? ########## File path: cpp/CMakeLists.txt ########## @@ -801,7 +801,11 @@ endif() set(ARROW_SHARED_PRIVATE_LINK_LIBS ${ARROW_STATIC_LINK_LIBS}) # boost::filesystem is needed for S3 and Flight tests as a boost::process dependency. -if(((ARROW_FLIGHT OR ARROW_S3) AND (ARROW_BUILD_TESTS OR ARROW_BUILD_INTEGRATION))) +if(((ARROW_FLIGHT + OR ARROW_S3 Review comment: yeah, lets try a rebase. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
