This is an automated email from the ASF dual-hosted git repository.

uwe pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new ca23513  ARROW-4697: [C++] Add URI parsing facility
ca23513 is described below

commit ca2351363ba1724de17eda3dd8ef334d7231f4f8
Author: Antoine Pitrou <anto...@python.org>
AuthorDate: Wed Mar 20 09:23:27 2019 +0100

    ARROW-4697: [C++] Add URI parsing facility
    
    Using the [uriparser](https://uriparser.github.io/) C library.
    
    Author: Antoine Pitrou <anto...@python.org>
    Author: Uwe L. Korn <xho...@users.noreply.github.com>
    
    Closes #3779 from pitrou/ARROW-4697-cpp-uri-parsing and squashes the 
following commits:
    
    5ad5d1b6 <Uwe L. Korn> Set CMAKE_INSTALL_LIBDIR when building uriparser
    ff41cef7 <Antoine Pitrou> Fix AppVeyor failure?
    6348b896 <Antoine Pitrou> - Work around a CMake issue - Add docstrings - 
Add more path tests
    3f5c5899 <Antoine Pitrou> Explicitly bundle uriparser if conda was set as 
default source
    a1714902 <Antoine Pitrou> ARROW-4697:  Add URI parsing facility
---
 ci/appveyor-cpp-build.bat                   |   2 +-
 cpp/CMakeLists.txt                          |   3 +
 cpp/cmake_modules/ThirdpartyToolchain.cmake |  75 ++++++++++++
 cpp/src/arrow/CMakeLists.txt                |   1 +
 cpp/src/arrow/symbols.map                   |   2 +
 cpp/src/arrow/util/CMakeLists.txt           |   1 +
 cpp/src/arrow/util/uri-test.cc              | 182 ++++++++++++++++++++++++++++
 cpp/src/arrow/util/uri.cc                   | 147 ++++++++++++++++++++++
 cpp/src/arrow/util/uri.h                    |  67 ++++++++++
 cpp/thirdparty/versions.txt                 |   3 +
 10 files changed, 482 insertions(+), 1 deletion(-)

diff --git a/ci/appveyor-cpp-build.bat b/ci/appveyor-cpp-build.bat
index b735d0f..0320db9 100644
--- a/ci/appveyor-cpp-build.bat
+++ b/ci/appveyor-cpp-build.bat
@@ -96,7 +96,7 @@ if "%JOB%" == "Build_Debug" (
   exit /B 0
 )
 
-set CONDA_PACKAGES=--file=ci\conda_env_python.yml python=%PYTHON% numpy=1.14 
thrift-cpp=0.11 boost-cpp
+set CONDA_PACKAGES=--file=ci\conda_env_python.yml python=%PYTHON% numpy=1.14 
boost-cpp
 
 if "%ARROW_BUILD_GANDIVA%" == "ON" (
   @rem Install llvmdev in the toolchain if building gandiva.dll
diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index 3886743..d80fe70 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -757,6 +757,9 @@ set(ARROW_LINK_LIBS ${double-conversion_LIBRARIES})
 set(ARROW_STATIC_LINK_LIBS ${double-conversion_LIBRARIES})
 set(ARROW_STATIC_INSTALL_INTERFACE_LIBS ${double-conversion_LIBRARIES})
 
+list(APPEND ARROW_STATIC_LINK_LIBS uriparser::uriparser)
+list(APPEND ARROW_STATIC_INSTALL_INTERFACE_LIBS uriparser::uriparser)
+
 if(ARROW_WITH_BROTLI)
   # Order is important for static linking
   list(APPEND ARROW_LINK_LIBS Brotli::brotlienc Brotli::brotlidec 
Brotli::brotlicommon)
diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake 
b/cpp/cmake_modules/ThirdpartyToolchain.cmake
index 29b6173..bdb8a98 100644
--- a/cpp/cmake_modules/ThirdpartyToolchain.cmake
+++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake
@@ -133,6 +133,8 @@ macro(build_dependency DEPENDENCY_NAME)
     build_grpc()
   elseif("${DEPENDENCY_NAME}" STREQUAL "BZip2")
     build_bzip2()
+  elseif("${DEPENDENCY_NAME}" STREQUAL "uriparser")
+    build_uriparser()
   else()
     message(FATAL_ERROR "Unknown thirdparty dependency to build: 
${DEPENDENCY_NAME}")
   endif()
@@ -341,6 +343,13 @@ else()
     )
 endif()
 
+if(DEFINED ENV{ARROW_URIPARSER_URL})
+  set(URIPARSER_SOURCE_URL "$ENV{ARROW_URIPARSER_URL}")
+else()
+  set(URIPARSER_SOURCE_URL
+      
"https://github.com/uriparser/uriparser/archive/${URIPARSER_VERSION}.tar.gz";)
+endif()
+
 if(DEFINED ENV{ARROW_ZLIB_URL})
   set(ZLIB_SOURCE_URL "$ENV{ARROW_ZLIB_URL}")
 else()
@@ -516,6 +525,72 @@ include_directories(SYSTEM 
${double-conversion_INCLUDE_DIRS})
 double_conversion_compability()
 
 # ----------------------------------------------------------------------
+# uriparser library
+
+macro(build_uriparser)
+  message(STATUS "Building uriparser from source")
+  set(URIPARSER_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/uriparser_ep-install")
+  set(
+    URIPARSER_STATIC_LIB
+    
"${URIPARSER_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}uriparser${CMAKE_STATIC_LIBRARY_SUFFIX}"
+    )
+  set(URIPARSER_INCLUDE_DIRS "${URIPARSER_PREFIX}/include")
+
+  set(URIPARSER_CMAKE_ARGS
+      ${EP_COMMON_CMAKE_ARGS}
+      "-DURIPARSER_BUILD_DOCS=off"
+      "-DURIPARSER_BUILD_TESTS=off"
+      "-DURIPARSER_BUILD_TOOLS=off"
+      "-DURIPARSER_BUILD_WCHAR_T=off"
+      "-DBUILD_SHARED_LIBS=off"
+      "-DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}"
+      "-DCMAKE_INSTALL_LIBDIR=lib"
+      "-DCMAKE_POSITION_INDEPENDENT_CODE=on"
+      "-DCMAKE_INSTALL_PREFIX=<INSTALL_DIR>")
+
+  if(MSVC AND ARROW_USE_STATIC_CRT)
+    if("${CMAKE_BUILD_TYPE}" STREQUAL "DEBUG")
+      list(APPEND URIPARSER_CMAKE_ARGS "-DURIPARSER_MSVC_RUNTIME=/MTd")
+    else()
+      list(APPEND URIPARSER_CMAKE_ARGS "-DURIPARSER_MSVC_RUNTIME=/MT")
+    endif()
+  endif()
+
+  externalproject_add(uriparser_ep
+                      URL
+                      ${URIPARSER_SOURCE_URL}
+                      CMAKE_ARGS
+                      ${URIPARSER_CMAKE_ARGS}
+                      BUILD_BYPRODUCTS
+                      ${URIPARSER_STATIC_LIB}
+                      INSTALL_DIR
+                      ${URIPARSER_PREFIX}
+                      ${EP_LOG_OPTIONS})
+
+  add_library(uriparser::uriparser STATIC IMPORTED)
+  # Work around https://gitlab.kitware.com/cmake/cmake/issues/15052
+  file(MAKE_DIRECTORY ${URIPARSER_INCLUDE_DIRS})
+  set_target_properties(
+    uriparser::uriparser
+    PROPERTIES IMPORTED_LOCATION ${URIPARSER_STATIC_LIB} 
INTERFACE_INCLUDE_DIRECTORIES
+               ${URIPARSER_INCLUDE_DIRS})
+
+  add_dependencies(toolchain uriparser_ep)
+  add_dependencies(uriparser::uriparser uriparser_ep)
+endmacro()
+
+# Unless the user overrides uriparser_SOURCE, build uriparser ourselves
+if("${uriparser_SOURCE}" STREQUAL "")
+  set(uriparser_SOURCE "BUNDLED")
+endif()
+
+resolve_dependency(uriparser)
+
+get_target_property(URIPARSER_INCLUDE_DIRS uriparser::uriparser
+                    INTERFACE_INCLUDE_DIRECTORIES)
+include_directories(SYSTEM ${URIPARSER_INCLUDE_DIRS})
+
+# ----------------------------------------------------------------------
 # Snappy
 
 macro(build_snappy)
diff --git a/cpp/src/arrow/CMakeLists.txt b/cpp/src/arrow/CMakeLists.txt
index 4ef60c9..83c2674 100644
--- a/cpp/src/arrow/CMakeLists.txt
+++ b/cpp/src/arrow/CMakeLists.txt
@@ -129,6 +129,7 @@ set(ARROW_SRCS
     util/task-group.cc
     util/thread-pool.cc
     util/trie.cc
+    util/uri.cc
     util/utf8.cc
     vendored/datetime/tz.cpp)
 
diff --git a/cpp/src/arrow/symbols.map b/cpp/src/arrow/symbols.map
index 9ee0ff3..9b24ab4 100644
--- a/cpp/src/arrow/symbols.map
+++ b/cpp/src/arrow/symbols.map
@@ -66,6 +66,8 @@
     ERR_getErrorString;
     # jemalloc
     je_arrow_*;
+    # uriparser
+    uri*;
     # ORC destructors
     _ZThn8_N3orc*;
     # Protobuf symbols that aren't hidden by the C++ section below
diff --git a/cpp/src/arrow/util/CMakeLists.txt 
b/cpp/src/arrow/util/CMakeLists.txt
index ba24f88..ca0b96e 100644
--- a/cpp/src/arrow/util/CMakeLists.txt
+++ b/cpp/src/arrow/util/CMakeLists.txt
@@ -41,6 +41,7 @@ add_arrow_test(stl-util-test)
 add_arrow_test(task-group-test)
 add_arrow_test(thread-pool-test)
 add_arrow_test(trie-test)
+add_arrow_test(uri-test)
 add_arrow_test(utf8-util-test)
 
 add_arrow_benchmark(bit-util-benchmark)
diff --git a/cpp/src/arrow/util/uri-test.cc b/cpp/src/arrow/util/uri-test.cc
new file mode 100644
index 0000000..34a7d24
--- /dev/null
+++ b/cpp/src/arrow/util/uri-test.cc
@@ -0,0 +1,182 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <memory>
+#include <string>
+#include <vector>
+
+#include <gtest/gtest.h>
+
+#include "arrow/testing/gtest_util.h"
+#include "arrow/util/logging.h"
+#include "arrow/util/uri.h"
+
+namespace arrow {
+namespace internal {
+
+TEST(Uri, Empty) {
+  Uri uri;
+  ASSERT_EQ(uri.scheme(), "");
+}
+
+TEST(Uri, ParseSimple) {
+  Uri uri;
+  {
+    // An ephemeral string object shouldn't invalidate results
+    std::string s = "https://arrow.apache.org";;
+    ASSERT_OK(uri.Parse(s));
+    s.replace(0, s.size(), s.size(), 'X');  // replace contents
+  }
+  ASSERT_EQ(uri.scheme(), "https");
+  ASSERT_EQ(uri.host(), "arrow.apache.org");
+  ASSERT_EQ(uri.port_text(), "");
+}
+
+TEST(Uri, ParsePath) {
+  // The various edge cases below (leading and trailing slashes) have been
+  // checked against several Python URI parsing modules: `uri`, `rfc3986`, 
`rfc3987`
+
+  Uri uri;
+
+  // Relative path
+  ASSERT_OK(uri.Parse("unix:tmp/flight.sock"));
+  ASSERT_EQ(uri.scheme(), "unix");
+  ASSERT_FALSE(uri.has_host());
+  ASSERT_EQ(uri.host(), "");
+  ASSERT_EQ(uri.path(), "tmp/flight.sock");
+
+  // Absolute path
+  ASSERT_OK(uri.Parse("unix:/tmp/flight.sock"));
+  ASSERT_EQ(uri.scheme(), "unix");
+  ASSERT_FALSE(uri.has_host());
+  ASSERT_EQ(uri.host(), "");
+  ASSERT_EQ(uri.path(), "/tmp/flight.sock");
+
+  ASSERT_OK(uri.Parse("unix://localhost/tmp/flight.sock"));
+  ASSERT_EQ(uri.scheme(), "unix");
+  ASSERT_TRUE(uri.has_host());
+  ASSERT_EQ(uri.host(), "localhost");
+  ASSERT_EQ(uri.path(), "/tmp/flight.sock");
+
+  ASSERT_OK(uri.Parse("unix:///tmp/flight.sock"));
+  ASSERT_EQ(uri.scheme(), "unix");
+  ASSERT_TRUE(uri.has_host());
+  ASSERT_EQ(uri.host(), "");
+  ASSERT_EQ(uri.path(), "/tmp/flight.sock");
+
+  // Empty path
+  ASSERT_OK(uri.Parse("unix:"));
+  ASSERT_EQ(uri.scheme(), "unix");
+  ASSERT_FALSE(uri.has_host());
+  ASSERT_EQ(uri.host(), "");
+  ASSERT_EQ(uri.path(), "");
+
+  ASSERT_OK(uri.Parse("unix://localhost"));
+  ASSERT_EQ(uri.scheme(), "unix");
+  ASSERT_TRUE(uri.has_host());
+  ASSERT_EQ(uri.host(), "localhost");
+  ASSERT_EQ(uri.path(), "");
+
+  // With trailing slash
+  ASSERT_OK(uri.Parse("unix:/"));
+  ASSERT_EQ(uri.scheme(), "unix");
+  ASSERT_FALSE(uri.has_host());
+  ASSERT_EQ(uri.host(), "");
+  ASSERT_EQ(uri.path(), "/");
+
+  ASSERT_OK(uri.Parse("unix:tmp/"));
+  ASSERT_EQ(uri.scheme(), "unix");
+  ASSERT_FALSE(uri.has_host());
+  ASSERT_EQ(uri.host(), "");
+  ASSERT_EQ(uri.path(), "tmp/");
+
+  ASSERT_OK(uri.Parse("unix://localhost/"));
+  ASSERT_EQ(uri.scheme(), "unix");
+  ASSERT_TRUE(uri.has_host());
+  ASSERT_EQ(uri.host(), "localhost");
+  ASSERT_EQ(uri.path(), "/");
+
+  ASSERT_OK(uri.Parse("unix:/tmp/flight/"));
+  ASSERT_EQ(uri.scheme(), "unix");
+  ASSERT_FALSE(uri.has_host());
+  ASSERT_EQ(uri.host(), "");
+  ASSERT_EQ(uri.path(), "/tmp/flight/");
+
+  ASSERT_OK(uri.Parse("unix:///tmp/flight/"));
+  ASSERT_EQ(uri.scheme(), "unix");
+  ASSERT_TRUE(uri.has_host());
+  ASSERT_EQ(uri.host(), "");
+  ASSERT_EQ(uri.path(), "/tmp/flight/");
+}
+
+TEST(Uri, ParseHostPort) {
+  Uri uri;
+
+  ASSERT_OK(uri.Parse("http://localhost:80";));
+  ASSERT_EQ(uri.scheme(), "http");
+  ASSERT_EQ(uri.host(), "localhost");
+  ASSERT_EQ(uri.port_text(), "80");
+  ASSERT_EQ(uri.port(), 80);
+
+  ASSERT_OK(uri.Parse("http://1.2.3.4";));
+  ASSERT_EQ(uri.scheme(), "http");
+  ASSERT_EQ(uri.host(), "1.2.3.4");
+  ASSERT_EQ(uri.port_text(), "");
+  ASSERT_EQ(uri.port(), -1);
+
+  ASSERT_OK(uri.Parse("http://1.2.3.4:";));
+  ASSERT_EQ(uri.scheme(), "http");
+  ASSERT_EQ(uri.host(), "1.2.3.4");
+  ASSERT_EQ(uri.port_text(), "");
+  ASSERT_EQ(uri.port(), -1);
+
+  ASSERT_OK(uri.Parse("http://1.2.3.4:80";));
+  ASSERT_EQ(uri.scheme(), "http");
+  ASSERT_EQ(uri.host(), "1.2.3.4");
+  ASSERT_EQ(uri.port_text(), "80");
+  ASSERT_EQ(uri.port(), 80);
+
+  ASSERT_OK(uri.Parse("http://[::1]";));
+  ASSERT_EQ(uri.scheme(), "http");
+  ASSERT_EQ(uri.host(), "::1");
+  ASSERT_EQ(uri.port_text(), "");
+  ASSERT_EQ(uri.port(), -1);
+
+  ASSERT_OK(uri.Parse("http://[::1]:";));
+  ASSERT_EQ(uri.scheme(), "http");
+  ASSERT_EQ(uri.host(), "::1");
+  ASSERT_EQ(uri.port_text(), "");
+  ASSERT_EQ(uri.port(), -1);
+
+  ASSERT_OK(uri.Parse("http://[::1]:80";));
+  ASSERT_EQ(uri.scheme(), "http");
+  ASSERT_EQ(uri.host(), "::1");
+  ASSERT_EQ(uri.port_text(), "80");
+  ASSERT_EQ(uri.port(), 80);
+}
+
+TEST(Uri, ParseError) {
+  Uri uri;
+
+  ASSERT_RAISES(Invalid, uri.Parse("http://a:b:c:d";));
+  ASSERT_RAISES(Invalid, uri.Parse("http://localhost:z";));
+  ASSERT_RAISES(Invalid, uri.Parse("http://localhost:-1";));
+  ASSERT_RAISES(Invalid, uri.Parse("http://localhost:99999";));
+}
+
+}  // namespace internal
+}  // namespace arrow
diff --git a/cpp/src/arrow/util/uri.cc b/cpp/src/arrow/util/uri.cc
new file mode 100644
index 0000000..3a90612
--- /dev/null
+++ b/cpp/src/arrow/util/uri.cc
@@ -0,0 +1,147 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/util/uri.h"
+
+#include <cstring>
+#include <sstream>
+#include <vector>
+
+#include <uriparser/Uri.h>
+
+#include "arrow/util/parsing.h"
+#include "arrow/util/string_view.h"
+
+namespace arrow {
+namespace internal {
+
+namespace {
+
+util::string_view TextRangeToView(const UriTextRangeStructA& range) {
+  if (range.first == nullptr) {
+    return "";
+  } else {
+    return {range.first, static_cast<size_t>(range.afterLast - range.first)};
+  }
+}
+
+std::string TextRangeToString(const UriTextRangeStructA& range) {
+  return std::string(TextRangeToView(range));
+}
+
+// There can be a difference between an absent field and an empty field.
+// For example, in "unix:/tmp/foo", the host is absent, while in
+// "unix:///tmp/foo", the host is empty but present.
+// This function helps distinguish.
+bool IsTextRangeSet(const UriTextRangeStructA& range) { return range.first != 
nullptr; }
+
+}  // namespace
+
+struct Uri::Impl {
+  Impl() : port_(-1) { memset(&uri_, 0, sizeof(uri_)); }
+
+  ~Impl() { uriFreeUriMembersA(&uri_); }
+
+  void Reset() {
+    uriFreeUriMembersA(&uri_);
+    memset(&uri_, 0, sizeof(uri_));
+    data_.clear();
+    port_ = -1;
+  }
+
+  const std::string& KeepString(const std::string& s) {
+    data_.push_back(s);
+    return data_.back();
+  }
+
+  UriUriA uri_;
+  // Keep alive strings that uriparser stores pointers to
+  std::vector<std::string> data_;
+  int32_t port_;
+};
+
+Uri::Uri() : impl_(new Impl) {}
+
+Uri::~Uri() {}
+
+std::string Uri::scheme() const { return 
TextRangeToString(impl_->uri_.scheme); }
+
+std::string Uri::host() const { return 
TextRangeToString(impl_->uri_.hostText); }
+
+bool Uri::has_host() const { return IsTextRangeSet(impl_->uri_.hostText); }
+
+std::string Uri::port_text() const { return 
TextRangeToString(impl_->uri_.portText); }
+
+int32_t Uri::port() const { return impl_->port_; }
+
+std::string Uri::path() const {
+  // Gather path segments
+  std::vector<util::string_view> segments;
+  auto path_seg = impl_->uri_.pathHead;
+  while (path_seg != nullptr) {
+    segments.push_back(TextRangeToView(path_seg->text));
+    path_seg = path_seg->next;
+  }
+
+  std::stringstream ss;
+  if (impl_->uri_.absolutePath == URI_TRUE) {
+    ss << "/";
+  } else if (has_host() && segments.size() > 0) {
+    // When there's a host (even empty), uriparser considers the path relative.
+    // Several URI parsers for Python all consider it absolute, though.
+    // For example, the path for "file:///tmp/foo" is "/tmp/foo", not 
"tmp/foo".
+    // Similarly, the path for "file://localhost/" is "/".
+    // However, the path for "file://localhost" is "".
+    ss << "/";
+  }
+  bool first = true;
+  for (const auto seg : segments) {
+    if (!first) {
+      ss << "/";
+    }
+    first = false;
+    ss << seg;
+  }
+  return ss.str();
+}
+
+Status Uri::Parse(const std::string& uri_string) {
+  impl_->Reset();
+
+  const auto& s = impl_->KeepString(uri_string);
+  const char* error_pos;
+  if (uriParseSingleUriExA(&impl_->uri_, s.data(), s.data() + s.size(), 
&error_pos) !=
+      URI_SUCCESS) {
+    return Status::Invalid("Cannot parse URI: '", uri_string, "'");
+  }
+  // Parse port number
+  auto port_text = TextRangeToView(impl_->uri_.portText);
+  if (port_text.size()) {
+    StringConverter<UInt16Type> port_converter;
+    uint16_t port_num;
+    if (!port_converter(port_text.data(), port_text.size(), &port_num)) {
+      return Status::Invalid("Invalid port number '", port_text, "' in URI '", 
uri_string,
+                             "'");
+    }
+    impl_->port_ = port_num;
+  }
+
+  return Status::OK();
+}
+
+}  // namespace internal
+}  // namespace arrow
diff --git a/cpp/src/arrow/util/uri.h b/cpp/src/arrow/util/uri.h
new file mode 100644
index 0000000..3d69495
--- /dev/null
+++ b/cpp/src/arrow/util/uri.h
@@ -0,0 +1,67 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cstdint>
+#include <memory>
+#include <string>
+
+#include "arrow/status.h"
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+namespace internal {
+
+/// \brief A parsed URI
+class ARROW_EXPORT Uri {
+ public:
+  Uri();
+  ~Uri();
+
+  // XXX Should we use util::string_view instead?  These functions are
+  // not performance-critical.
+
+  /// The URI scheme, such as "http", or the empty string if the URI has no
+  /// explicit scheme.
+  std::string scheme() const;
+  /// Whether the URI has an explicit host name.  This may return true if
+  /// the URI has an empty host (e.g. "file:///tmp/foo"), while it returns
+  /// false is the URI has not host component at all (e.g. "file:/tmp/foo").
+  bool has_host() const;
+  /// The URI host name, such as "localhost", "127.0.0.1" or "::1", or the 
empty
+  /// string is the URI does not have a host component.
+  std::string host() const;
+  /// The URI port number, as a string such as "80", or the empty string is 
the URI
+  /// does not have a port number component.
+  std::string port_text() const;
+  /// The URI port parsed as an integer, or -1 if the URI does not have a port
+  /// number component.
+  int32_t port() const;
+  /// The URI path component.
+  std::string path() const;
+
+  /// Factory function to parse a URI from its string representation.
+  Status Parse(const std::string& uri_string);
+
+ private:
+  struct Impl;
+  std::unique_ptr<Impl> impl_;
+};
+
+}  // namespace internal
+}  // namespace arrow
diff --git a/cpp/thirdparty/versions.txt b/cpp/thirdparty/versions.txt
index e7ad54c..737f062 100644
--- a/cpp/thirdparty/versions.txt
+++ b/cpp/thirdparty/versions.txt
@@ -45,6 +45,8 @@ RAPIDJSON_VERSION=2bbd33b33217ff4a73434ebf10cdac41e2ef5e34
 RE2_VERSION=2018-10-01
 SNAPPY_VERSION=1.1.3
 THRIFT_VERSION=0.12.0
+# CMake support appeared after latest release (0.9.1)
+URIPARSER_VERSION=63384be4fb8197264c55ff53a135110ecd5bd8c4
 ZLIB_VERSION=1.2.8
 ZSTD_VERSION=v1.3.7
 
@@ -70,6 +72,7 @@ DEPENDENCIES=(
   "ARROW_RE2_URL re2-${RE2_VERSION}.tar.gz 
https://github.com/google/re2/archive/${RE2_VERSION}.tar.gz";
   "ARROW_SNAPPY_URL snappy-${SNAPPY_VERSION}.tar.gz 
https://github.com/google/snappy/releases/download/${SNAPPY_VERSION}/snappy-${SNAPPY_VERSION}.tar.gz";
   "ARROW_THRIFT_URL thrift-${THRIFT_VERSION}.tar.gz 
http://archive.apache.org/dist/thrift/${THRIFT_VERSION}/thrift-${THRIFT_VERSION}.tar.gz";
+  "ARROW_URIPARSER_URL uriparser-${URIPARSER_VERSION}.tar.gz 
https://github.com/uriparser/uriparser/archive/${URIPARSER_VERSION}.tar.gz";
   "ARROW_ZLIB_URL zlib-${ZLIB_VERSION}.tar.gz 
http://zlib.net/fossils/zlib-${ZLIB_VERSION}.tar.gz";
   "ARROW_ZSTD_URL zstd-${ZSTD_VERSION}.tar.gz 
https://github.com/facebook/zstd/archive/${ZSTD_VERSION}.tar.gz";
 )

Reply via email to