kou commented on code in PR #38505:
URL: https://github.com/apache/arrow/pull/38505#discussion_r1387402198
##########
cpp/src/arrow/filesystem/azurefs.cc:
##########
@@ -453,27 +453,136 @@ class ObjectInputFile final : public
io::RandomAccessFile {
class AzureFileSystem::Impl {
public:
io::IOContext io_context_;
- std::shared_ptr<Azure::Storage::Blobs::BlobServiceClient> service_client_;
+ std::unique_ptr<Azure::Storage::Files::DataLake::DataLakeServiceClient>
+ datalake_service_client_;
+ std::unique_ptr<Azure::Storage::Blobs::BlobServiceClient>
blob_service_client_;
AzureOptions options_;
+ internal::HierarchicalNamespaceDetector hierarchical_namespace_;
explicit Impl(AzureOptions options, io::IOContext io_context)
: io_context_(io_context), options_(std::move(options)) {}
Status Init() {
- service_client_ =
std::make_shared<Azure::Storage::Blobs::BlobServiceClient>(
+ blob_service_client_ =
std::make_unique<Azure::Storage::Blobs::BlobServiceClient>(
options_.account_blob_url, options_.storage_credentials_provider);
+ datalake_service_client_ =
+
std::make_unique<Azure::Storage::Files::DataLake::DataLakeServiceClient>(
+ options_.account_dfs_url, options_.storage_credentials_provider);
+
RETURN_NOT_OK(hierarchical_namespace_.Init(datalake_service_client_.get()));
return Status::OK();
}
const AzureOptions& options() const { return options_; }
+ public:
+ Result<FileInfo> GetFileInfo(const AzurePath& path) {
+ FileInfo info;
+ info.set_path(path.full_path);
+
+ if (path.container.empty()) {
+ DCHECK(path.path_to_file.empty()); // The path is invalid if the
container is empty
+ // but not path_to_file.
+ // path must refer to the root of the Azure storage account. This is a
directory,
+ // and there isn't any extra metadata to fetch.
+ info.set_type(FileType::Directory);
+ return info;
+ }
+ if (path.path_to_file.empty()) {
+ // path refers to a container. This is a directory if it exists.
+ auto container_client =
+ blob_service_client_->GetBlobContainerClient(path.container);
+ try {
+ auto properties = container_client.GetProperties();
+ info.set_type(FileType::Directory);
+ info.set_mtime(
+
std::chrono::system_clock::time_point(properties.Value.LastModified));
+ return info;
+ } catch (const Azure::Storage::StorageException& exception) {
+ if (exception.StatusCode ==
Azure::Core::Http::HttpStatusCode::NotFound) {
+ info.set_type(FileType::NotFound);
+ return info;
+ }
+ return internal::ExceptionToStatus(
+ "GetProperties for '" + container_client.GetUrl() +
+ "' failed with an unexpected Azure error. GetFileInfo is
unable to "
+ "determine whether the container exists.",
+ exception);
+ }
+ }
+ auto file_client =
datalake_service_client_->GetFileSystemClient(path.container)
+ .GetFileClient(path.path_to_file);
+ try {
+ auto properties = file_client.GetProperties();
+ if (properties.Value.IsDirectory) {
+ info.set_type(FileType::Directory);
+ } else if (internal::HasTrailingSlash(path.path_to_file)) {
+ // For a path with a trailing slash a hierarchical namespace may
return a blob
+ // with that trailing slash removed. For consistency with flat
namespace and
+ // other filesystems we chose to return NotFound.
+ info.set_type(FileType::NotFound);
+ return info;
+ } else {
+ info.set_type(FileType::File);
+ info.set_size(properties.Value.FileSize);
+ }
+ info.set_mtime(
+
std::chrono::system_clock::time_point(properties.Value.LastModified));
+ return info;
+ } catch (const Azure::Storage::StorageException& exception) {
+ if (exception.StatusCode == Azure::Core::Http::HttpStatusCode::NotFound)
{
+ ARROW_ASSIGN_OR_RAISE(bool hierarchical_namespace_enabled,
Review Comment:
```suggestion
ARROW_ASSIGN_OR_RAISE(auto hierarchical_namespace_enabled,
```
##########
cpp/src/arrow/filesystem/azurefs.cc:
##########
@@ -453,27 +457,137 @@ class ObjectInputFile final : public
io::RandomAccessFile {
class AzureFileSystem::Impl {
public:
io::IOContext io_context_;
- std::shared_ptr<Azure::Storage::Blobs::BlobServiceClient> service_client_;
+ std::shared_ptr<Azure::Storage::Files::DataLake::DataLakeServiceClient>
+ datalake_service_client_;
+ std::unique_ptr<Azure::Storage::Blobs::BlobServiceClient>
blob_service_client_;
AzureOptions options_;
+ internal::HierarchicalNamespaceDetector hierarchical_namespace_;
Review Comment:
OK. Let's use the approach.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]