kou commented on code in PR #12914:
URL: https://github.com/apache/arrow/pull/12914#discussion_r898408480


##########
ci/docker/ubuntu-22.04-cpp.dockerfile:
##########
@@ -150,7 +150,8 @@ RUN /arrow/ci/scripts/install_gcs_testbench.sh default
 # - flatbuffer is not packaged
 # - libgtest-dev only provide sources
 # - libprotobuf-dev only provide sources
-ENV ARROW_BUILD_TESTS=ON \
+ENV ARROW_AZURE: ON \

Review Comment:
   ```suggestion
   ENV ARROW_AZURE=ON \
   ```



##########
cpp/src/arrow/filesystem/azurefs_mock.cc:
##########
@@ -0,0 +1,1152 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/filesystem/azurefs_mock.h"
+
+#include <algorithm>
+#include <iostream>
+#include <iterator>
+#include <map>
+#include <mutex>
+#include <sstream>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "arrow/buffer.h"
+#include "arrow/buffer_builder.h"
+#include "arrow/filesystem/mockfs.h"
+#include "arrow/filesystem/path_util.h"
+#include "arrow/filesystem/util_internal.h"
+#include "arrow/io/interfaces.h"
+#include "arrow/io/memory.h"
+#include "arrow/util/async_generator.h"
+#include "arrow/util/future.h"
+#include "arrow/util/logging.h"
+#include "arrow/util/string_view.h"
+#include "arrow/util/variant.h"
+#include "arrow/util/windows_fixup.h"
+
+namespace arrow {
+namespace fs {
+namespace internal {
+
+namespace {
+
+Status ValidatePath(util::string_view s) {
+  if (internal::IsLikelyUri(s)) {
+    return Status::Invalid("Expected a filesystem path, got a URI: '", s, "'");
+  }
+  return Status::OK();
+}
+
+////////////////////////////////////////////////////////////////////////////
+// Filesystem structure
+
+class Entry;
+
+struct AzurePath {
+  std::string full_path;
+  std::string container;
+  std::string path_to_file;
+  std::vector<std::string> path_to_file_parts;
+
+  static Result<AzurePath> FromString(const std::string& s) {
+    // 
https://synapsemladlsgen2.dfs.core.windows.net/synapsemlfs/testdir/testfile.txt
+    // container = synapsemlfs
+    // account_name = synapsemladlsgen2
+    // path_to_file = testdir/testfile.txt
+    // path_to_file_parts = [testdir, testfile.txt]
+
+    // Expected input here => s = /synapsemlfs/testdir/testfile.txt
+    auto src = internal::RemoveTrailingSlash(s);
+    if (src.starts_with("https:") || src.starts_with("http::")) {

Review Comment:
   How about using `internal::IsLikelyUri()`?



##########
cpp/src/arrow/filesystem/CMakeLists.txt:
##########
@@ -37,6 +37,10 @@ if(ARROW_GCS)
                  Boost::system)
 endif()
 
+if(ARROW_AZURE)
+  add_arrow_test(azurefs_test EXTRA_LABELS filesystem EXTRA_LINK_LIBS)

Review Comment:
   ```suggestion
     add_arrow_test(azurefs_test EXTRA_LABELS filesystem)
   ```



##########
cpp/cmake_modules/ThirdpartyToolchain.cmake:
##########
@@ -4553,6 +4594,152 @@ if(ARROW_S3)
   endif()
 endif()
 
+macro(build_azuresdk)
+  message(STATUS "Building Azure C++ SDK from source")
+
+  find_curl()
+  find_package(OpenSSL ${ARROW_OPENSSL_REQUIRED_VERSION} REQUIRED)
+
+  set(AZURESDK_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/azuresdk_ep-install")
+  set(AZURESDK_INCLUDE_DIR "${AZURESDK_PREFIX}/include")
+  set(AZURESDK_LIB_DIR "lib")
+
+  # provide hint for Azure SDK to link with the already located openssl
+  get_filename_component(OPENSSL_ROOT_HINT "${OPENSSL_INCLUDE_DIR}" DIRECTORY)
+
+  set(AZURESDK_COMMON_CMAKE_ARGS
+      ${EP_COMMON_CMAKE_ARGS}
+      -DBUILD_SHARED_LIBS=OFF
+      -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}

Review Comment:
   We can omit this becuase `EP_COMMON_CMAKE_ARGS` includes it.
   
   ```suggestion
   ```



##########
ci/docker/ubuntu-18.04-cpp.dockerfile:
##########
@@ -98,7 +98,8 @@ RUN apt-get update -y -q && \
 # - thrift is too old
 # - utf8proc is too old(v2.1.0)
 # - s3 tests would require boost-asio that is included since Boost 1.66.0
-ENV ARROW_BUILD_TESTS=ON \
+ENV ARROW_AZURE: ON \

Review Comment:
   ```suggestion
   ENV ARROW_AZURE=ON \
   ```



##########
cpp/cmake_modules/ThirdpartyToolchain.cmake:
##########
@@ -4553,6 +4594,152 @@ if(ARROW_S3)
   endif()
 endif()
 
+macro(build_azuresdk)
+  message(STATUS "Building Azure C++ SDK from source")
+
+  find_curl()
+  find_package(OpenSSL ${ARROW_OPENSSL_REQUIRED_VERSION} REQUIRED)
+
+  set(AZURESDK_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/azuresdk_ep-install")
+  set(AZURESDK_INCLUDE_DIR "${AZURESDK_PREFIX}/include")
+  set(AZURESDK_LIB_DIR "lib")
+
+  # provide hint for Azure SDK to link with the already located openssl
+  get_filename_component(OPENSSL_ROOT_HINT "${OPENSSL_INCLUDE_DIR}" DIRECTORY)
+
+  set(AZURESDK_COMMON_CMAKE_ARGS
+      ${EP_COMMON_CMAKE_ARGS}
+      -DBUILD_SHARED_LIBS=OFF
+      -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}
+      -DCMAKE_INSTALL_LIBDIR=${AZURESDK_LIB_DIR}
+      -DENABLE_TESTING=OFF
+      -DENABLE_UNITY_BUILD=ON
+      "-DCMAKE_INSTALL_PREFIX=${AZURESDK_PREFIX}"
+      "-DCMAKE_PREFIX_PATH=${AZURESDK_PREFIX}"
+      -DWARNINGS_AS_ERRORS=OFF
+      -DOPENSSL_ROOT_DIR=${OPENSSL_ROOT_HINT})
+
+  file(MAKE_DIRECTORY ${AZURESDK_INCLUDE_DIR})
+
+  set(AZURE_CORE_STATIC_LIBRARY
+      
"${AZURESDK_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}azure-core${CMAKE_STATIC_LIBRARY_SUFFIX}"
+  )
+  externalproject_add(azure_core_ep
+                      ${EP_LOG_OPTIONS}
+                      INSTALL_DIR ${AZURESDK_PREFIX}
+                      URL ${AZURE_CORE_SOURCE_URL}
+                      URL_HASH 
"SHA256=${ARROW_AZURE_CORE_BUILD_SHA256_CHECKSUM}"
+                      CMAKE_ARGS ${AZURESDK_COMMON_CMAKE_ARGS}
+                      BUILD_BYPRODUCTS ${AZURE_CORE_STATIC_LIBRARY})
+  add_library(Azure::azure-core STATIC IMPORTED)
+  set_target_properties(Azure::azure-core
+                        PROPERTIES IMPORTED_LOCATION 
"${AZURE_CORE_STATIC_LIBRARY}"
+                                   INTERFACE_INCLUDE_DIRECTORIES
+                                   "${AZURESDK_INCLUDE_DIR}")
+  target_link_libraries(Azure::azure-core INTERFACE LibXml2::LibXml2)
+  add_dependencies(Azure::azure-core azure_core_ep)
+
+  set(AZURE_IDENTITY_STATIC_LIBRARY
+      
"${AZURESDK_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}azure-identity${CMAKE_STATIC_LIBRARY_SUFFIX}"
+  )
+  externalproject_add(azure_identity_ep
+                      ${EP_LOG_OPTIONS}
+                      INSTALL_DIR ${AZURESDK_PREFIX}
+                      URL ${AZURE_IDENTITY_SOURCE_URL}
+                      URL_HASH 
"SHA256=${ARROW_AZURE_IDENTITY_BUILD_SHA256_CHECKSUM}"
+                      CMAKE_ARGS ${AZURESDK_COMMON_CMAKE_ARGS}
+                      BUILD_BYPRODUCTS ${AZURE_IDENTITY_STATIC_LIBRARY})
+  add_library(Azure::azure-identity STATIC IMPORTED)
+  set_target_properties(Azure::azure-identity
+                        PROPERTIES IMPORTED_LOCATION 
"${AZURE_IDENTITY_STATIC_LIBRARY}"
+                                   INTERFACE_INCLUDE_DIRECTORIES
+                                   "${AZURESDK_INCLUDE_DIR}")
+  target_link_libraries(Azure::azure-identity INTERFACE LibXml2::LibXml2)
+  add_dependencies(Azure::azure-identity azure_identity_ep)
+
+  set(AZURE_STORAGE_BLOBS_STATIC_LIBRARY
+      
"${AZURESDK_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}azure-storage-blobs${CMAKE_STATIC_LIBRARY_SUFFIX}"
+  )
+  externalproject_add(azure_storage_blobs_ep
+                      ${EP_LOG_OPTIONS}
+                      INSTALL_DIR ${AZURESDK_PREFIX}
+                      URL ${AZURE_STORAGE_BLOB_SOURCE_URL}
+                      URL_HASH 
"SHA256=${ARROW_AZURE_STORAGE_BLOB_BUILD_SHA256_CHECKSUM}"
+                      CMAKE_ARGS ${AZURESDK_COMMON_CMAKE_ARGS}
+                      BUILD_BYPRODUCTS ${AZURE_STORAGE_BLOBS_STATIC_LIBRARY})
+  add_library(Azure::azure-storage-blobs STATIC IMPORTED)
+  set_target_properties(Azure::azure-storage-blobs
+                        PROPERTIES IMPORTED_LOCATION
+                                   "${AZURE_STORAGE_BLOBS_STATIC_LIBRARY}"
+                                   INTERFACE_INCLUDE_DIRECTORIES
+                                   "${AZURESDK_INCLUDE_DIR}")
+  target_link_libraries(Azure::azure-storage-blobs INTERFACE LibXml2::LibXml2)
+  add_dependencies(Azure::azure-storage-blobs azure_storage_blobs_ep)
+
+  set(AZURE_STORAGE_COMMON_STATIC_LIBRARY
+      
"${AZURESDK_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}azure-storage-common${CMAKE_STATIC_LIBRARY_SUFFIX}"
+  )
+  externalproject_add(azure_storage_common_ep
+                      ${EP_LOG_OPTIONS}
+                      INSTALL_DIR ${AZURESDK_PREFIX}
+                      URL ${AZURE_STORAGE_COMMON_SOURCE_URL}
+                      URL_HASH 
"SHA256=${ARROW_AZURE_STORAGE_COMMON_BUILD_SHA256_CHECKSUM}"
+                      CMAKE_ARGS ${AZURESDK_COMMON_CMAKE_ARGS}
+                      BUILD_BYPRODUCTS ${AZURE_STORAGE_COMMON_STATIC_LIBRARY})
+  add_library(Azure::azure-storage-common STATIC IMPORTED)
+  set_target_properties(Azure::azure-storage-common
+                        PROPERTIES IMPORTED_LOCATION
+                                   "${AZURE_STORAGE_COMMON_STATIC_LIBRARY}"
+                                   INTERFACE_INCLUDE_DIRECTORIES
+                                   "${AZURESDK_INCLUDE_DIR}")
+  target_link_libraries(Azure::azure-storage-common INTERFACE LibXml2::LibXml2)
+  add_dependencies(Azure::azure-storage-common azure_storage_common_ep)
+
+  set(AZURE_STORAGE_FILES_DATALAKE_STATIC_LIBRARY
+      
"${AZURESDK_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}azure-storage-files-datalake${CMAKE_STATIC_LIBRARY_SUFFIX}"
+  )
+  externalproject_add(azure_storage_files_datalake_ep
+                      ${EP_LOG_OPTIONS}
+                      INSTALL_DIR ${AZURESDK_PREFIX}
+                      URL ${AZURE_STORAGE_FILES_DATALAKE_SOURCE_URL}
+                      URL_HASH 
"SHA256=${ARROW_AZURE_STORAGE_FILES_DATALAKE_BUILD_SHA256_CHECKSUM}"
+                      CMAKE_ARGS ${AZURESDK_COMMON_CMAKE_ARGS}
+                      BUILD_BYPRODUCTS 
${AZURE_STORAGE_FILES_DATALAKE_STATIC_LIBRARY})
+  add_library(Azure::azure-storage-files-datalake STATIC IMPORTED)
+  set_target_properties(Azure::azure-storage-files-datalake
+                        PROPERTIES IMPORTED_LOCATION
+                                   
"${AZURE_STORAGE_FILES_DATALAKE_STATIC_LIBRARY}"
+                                   INTERFACE_INCLUDE_DIRECTORIES
+                                   "${AZURESDK_INCLUDE_DIR}")
+  target_link_libraries(Azure::azure-storage-files-datalake INTERFACE 
LibXml2::LibXml2)
+  add_dependencies(Azure::azure-storage-files-datalake 
azure_storage_files_datalake_ep)
+
+  set(AZURESDK_LIBRARIES)
+  list(APPEND
+       AZURESDK_LIBRARIES
+       Azure::azure-core
+       Azure::azure-storage-blobs
+       Azure::azure-identity
+       Azure::azure-storage-common
+       Azure::azure-storage-files-datalake)
+  list(APPEND
+       ARROW_BUNDLED_STATIC_LIBS
+       Azure::azure-core
+       Azure::azure-storage-blobs
+       Azure::azure-identity
+       Azure::azure-storage-common
+       Azure::azure-storage-files-datalake)
+
+  set(AZURESDK_LINK_LIBRARIES ${AZURESDK_LIBRARIES})
+endmacro()
+
+if(ARROW_AZURE)
+  find_package(LibXml2 REQUIRED)

Review Comment:
   Could you move this to `build_azuresdk()` like we did for curl and OpenSSL?



##########
cpp/cmake_modules/ThirdpartyToolchain.cmake:
##########
@@ -4553,6 +4594,152 @@ if(ARROW_S3)
   endif()
 endif()
 
+macro(build_azuresdk)
+  message(STATUS "Building Azure C++ SDK from source")
+
+  find_curl()
+  find_package(OpenSSL ${ARROW_OPENSSL_REQUIRED_VERSION} REQUIRED)
+
+  set(AZURESDK_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/azuresdk_ep-install")
+  set(AZURESDK_INCLUDE_DIR "${AZURESDK_PREFIX}/include")
+  set(AZURESDK_LIB_DIR "lib")
+
+  # provide hint for Azure SDK to link with the already located openssl
+  get_filename_component(OPENSSL_ROOT_HINT "${OPENSSL_INCLUDE_DIR}" DIRECTORY)
+
+  set(AZURESDK_COMMON_CMAKE_ARGS
+      ${EP_COMMON_CMAKE_ARGS}
+      -DBUILD_SHARED_LIBS=OFF
+      -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}
+      -DCMAKE_INSTALL_LIBDIR=${AZURESDK_LIB_DIR}
+      -DENABLE_TESTING=OFF
+      -DENABLE_UNITY_BUILD=ON
+      "-DCMAKE_INSTALL_PREFIX=${AZURESDK_PREFIX}"
+      "-DCMAKE_PREFIX_PATH=${AZURESDK_PREFIX}"
+      -DWARNINGS_AS_ERRORS=OFF
+      -DOPENSSL_ROOT_DIR=${OPENSSL_ROOT_HINT})

Review Comment:
   Could you sort this list in alphabetical order?



##########
ci/docker/ubuntu-20.04-cpp.dockerfile:
##########
@@ -122,7 +122,8 @@ RUN /arrow/ci/scripts/install_ceph.sh
 # - flatbuffer is not packaged
 # - libgtest-dev only provide sources
 # - libprotobuf-dev only provide sources
-ENV ARROW_BUILD_TESTS=ON \
+ENV ARROW_AZURE: ON \

Review Comment:
   ```suggestion
   ENV ARROW_AZURE=ON \
   ```



##########
cpp/src/arrow/filesystem/azurefs_mock.cc:
##########
@@ -0,0 +1,1152 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/filesystem/azurefs_mock.h"
+
+#include <algorithm>
+#include <iostream>
+#include <iterator>
+#include <map>
+#include <mutex>
+#include <sstream>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "arrow/buffer.h"
+#include "arrow/buffer_builder.h"
+#include "arrow/filesystem/mockfs.h"
+#include "arrow/filesystem/path_util.h"
+#include "arrow/filesystem/util_internal.h"
+#include "arrow/io/interfaces.h"
+#include "arrow/io/memory.h"
+#include "arrow/util/async_generator.h"
+#include "arrow/util/future.h"
+#include "arrow/util/logging.h"
+#include "arrow/util/string_view.h"
+#include "arrow/util/variant.h"
+#include "arrow/util/windows_fixup.h"
+
+namespace arrow {
+namespace fs {
+namespace internal {
+
+namespace {
+
+Status ValidatePath(util::string_view s) {
+  if (internal::IsLikelyUri(s)) {
+    return Status::Invalid("Expected a filesystem path, got a URI: '", s, "'");
+  }
+  return Status::OK();
+}
+
+////////////////////////////////////////////////////////////////////////////
+// Filesystem structure
+
+class Entry;
+
+struct AzurePath {
+  std::string full_path;
+  std::string container;
+  std::string path_to_file;
+  std::vector<std::string> path_to_file_parts;
+
+  static Result<AzurePath> FromString(const std::string& s) {
+    // 
https://synapsemladlsgen2.dfs.core.windows.net/synapsemlfs/testdir/testfile.txt
+    // container = synapsemlfs
+    // account_name = synapsemladlsgen2
+    // path_to_file = testdir/testfile.txt
+    // path_to_file_parts = [testdir, testfile.txt]
+
+    // Expected input here => s = /synapsemlfs/testdir/testfile.txt

Review Comment:
   Really? It seems that the leading `/` isn't allowed.



##########
cpp/src/arrow/filesystem/azurefs_mock.cc:
##########
@@ -0,0 +1,1152 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/filesystem/azurefs_mock.h"
+
+#include <algorithm>
+#include <iostream>
+#include <iterator>
+#include <map>
+#include <mutex>
+#include <sstream>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "arrow/buffer.h"
+#include "arrow/buffer_builder.h"
+#include "arrow/filesystem/mockfs.h"
+#include "arrow/filesystem/path_util.h"
+#include "arrow/filesystem/util_internal.h"
+#include "arrow/io/interfaces.h"
+#include "arrow/io/memory.h"
+#include "arrow/util/async_generator.h"
+#include "arrow/util/future.h"
+#include "arrow/util/logging.h"
+#include "arrow/util/string_view.h"
+#include "arrow/util/variant.h"
+#include "arrow/util/windows_fixup.h"
+
+namespace arrow {
+namespace fs {
+namespace internal {
+
+namespace {
+
+Status ValidatePath(util::string_view s) {
+  if (internal::IsLikelyUri(s)) {
+    return Status::Invalid("Expected a filesystem path, got a URI: '", s, "'");
+  }
+  return Status::OK();
+}
+
+////////////////////////////////////////////////////////////////////////////
+// Filesystem structure
+
+class Entry;
+
+struct AzurePath {
+  std::string full_path;
+  std::string container;
+  std::string path_to_file;
+  std::vector<std::string> path_to_file_parts;
+
+  static Result<AzurePath> FromString(const std::string& s) {
+    // 
https://synapsemladlsgen2.dfs.core.windows.net/synapsemlfs/testdir/testfile.txt
+    // container = synapsemlfs
+    // account_name = synapsemladlsgen2
+    // path_to_file = testdir/testfile.txt
+    // path_to_file_parts = [testdir, testfile.txt]
+
+    // Expected input here => s = /synapsemlfs/testdir/testfile.txt
+    auto src = internal::RemoveTrailingSlash(s);
+    if (src.starts_with("https:") || src.starts_with("http::")) {
+      RemoveSchemeFromUri(src);
+    }
+    auto first_sep = src.find_first_of(kSep);
+    if (first_sep == 0) {
+      return Status::Invalid("Path cannot start with a separator ('", s, "')");
+    }
+    if (first_sep == std::string::npos) {
+      return AzurePath{std::string(src), std::string(src), "", {}};
+    }
+    AzurePath path;
+    path.full_path = std::string(src);
+    path.container = std::string(src.substr(0, first_sep));
+    path.path_to_file = std::string(src.substr(first_sep + 1));
+    path.path_to_file_parts = internal::SplitAbstractPath(path.path_to_file);
+    RETURN_NOT_OK(Validate(&path));
+    return path;
+  }
+
+  static void RemoveSchemeFromUri(nonstd::sv_lite::string_view& s) {

Review Comment:
   It seems that this removes not only scheme (`http://` or `https://`) but 
also host and port and the leading `/` in path.
   
   Can we use more suitable name for this?
   
   Could you use `util::string_view` instead of `nonstd::sv_lite::string_view`? 



##########
cpp/src/arrow/filesystem/azurefs_mock.cc:
##########
@@ -0,0 +1,1152 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/filesystem/azurefs_mock.h"
+
+#include <algorithm>
+#include <iostream>
+#include <iterator>
+#include <map>
+#include <mutex>
+#include <sstream>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "arrow/buffer.h"
+#include "arrow/buffer_builder.h"
+#include "arrow/filesystem/mockfs.h"
+#include "arrow/filesystem/path_util.h"
+#include "arrow/filesystem/util_internal.h"
+#include "arrow/io/interfaces.h"
+#include "arrow/io/memory.h"
+#include "arrow/util/async_generator.h"
+#include "arrow/util/future.h"
+#include "arrow/util/logging.h"
+#include "arrow/util/string_view.h"
+#include "arrow/util/variant.h"
+#include "arrow/util/windows_fixup.h"
+
+namespace arrow {
+namespace fs {
+namespace internal {
+
+namespace {
+
+Status ValidatePath(util::string_view s) {
+  if (internal::IsLikelyUri(s)) {
+    return Status::Invalid("Expected a filesystem path, got a URI: '", s, "'");
+  }
+  return Status::OK();
+}
+
+////////////////////////////////////////////////////////////////////////////
+// Filesystem structure
+
+class Entry;
+
+struct AzurePath {
+  std::string full_path;
+  std::string container;
+  std::string path_to_file;
+  std::vector<std::string> path_to_file_parts;
+
+  static Result<AzurePath> FromString(const std::string& s) {
+    // 
https://synapsemladlsgen2.dfs.core.windows.net/synapsemlfs/testdir/testfile.txt
+    // container = synapsemlfs
+    // account_name = synapsemladlsgen2
+    // path_to_file = testdir/testfile.txt
+    // path_to_file_parts = [testdir, testfile.txt]
+
+    // Expected input here => s = /synapsemlfs/testdir/testfile.txt
+    auto src = internal::RemoveTrailingSlash(s);
+    if (src.starts_with("https:") || src.starts_with("http::")) {
+      RemoveSchemeFromUri(src);
+    }
+    auto first_sep = src.find_first_of(kSep);
+    if (first_sep == 0) {
+      return Status::Invalid("Path cannot start with a separator ('", s, "')");
+    }
+    if (first_sep == std::string::npos) {
+      return AzurePath{std::string(src), std::string(src), "", {}};
+    }
+    AzurePath path;
+    path.full_path = std::string(src);
+    path.container = std::string(src.substr(0, first_sep));
+    path.path_to_file = std::string(src.substr(first_sep + 1));
+    path.path_to_file_parts = internal::SplitAbstractPath(path.path_to_file);
+    RETURN_NOT_OK(Validate(&path));
+    return path;
+  }
+
+  static void RemoveSchemeFromUri(nonstd::sv_lite::string_view& s) {
+    auto first = s.find(".core.windows.net");
+    s = s.substr(first + 18, s.length());

Review Comment:
   How about using `internal::Uri` to extract the path part?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to