This is an automated email from the ASF dual-hosted git repository.

gangwu pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg-cpp.git


The following commit(s) were added to refs/heads/main by this push:
     new 68fe3813 feat: add simple url encoder & decoder (#457)
68fe3813 is described below

commit 68fe381366338a5d86b13dbd611a0b3f10212905
Author: wzhuo <[email protected]>
AuthorDate: Mon Jan 5 20:48:42 2026 +0800

    feat: add simple url encoder & decoder (#457)
---
 src/iceberg/CMakeLists.txt           |  1 +
 src/iceberg/meson.build              |  1 +
 src/iceberg/test/CMakeLists.txt      |  1 +
 src/iceberg/test/meson.build         |  1 +
 src/iceberg/test/url_encoder_test.cc | 83 +++++++++++++++++++++++++++++++++++
 src/iceberg/util/meson.build         |  1 +
 src/iceberg/util/url_encoder.cc      | 84 ++++++++++++++++++++++++++++++++++++
 src/iceberg/util/url_encoder.h       | 53 +++++++++++++++++++++++
 8 files changed, 225 insertions(+)

diff --git a/src/iceberg/CMakeLists.txt b/src/iceberg/CMakeLists.txt
index 617ec05b..2ecd652f 100644
--- a/src/iceberg/CMakeLists.txt
+++ b/src/iceberg/CMakeLists.txt
@@ -97,6 +97,7 @@ set(ICEBERG_SOURCES
     util/timepoint.cc
     util/truncate_util.cc
     util/type_util.cc
+    util/url_encoder.cc
     util/uuid.cc)
 
 set(ICEBERG_STATIC_BUILD_INTERFACE_LIBS)
diff --git a/src/iceberg/meson.build b/src/iceberg/meson.build
index 34538bde..78ebd604 100644
--- a/src/iceberg/meson.build
+++ b/src/iceberg/meson.build
@@ -118,6 +118,7 @@ iceberg_sources = files(
     'util/timepoint.cc',
     'util/truncate_util.cc',
     'util/type_util.cc',
+    'util/url_encoder.cc',
     'util/uuid.cc',
 )
 
diff --git a/src/iceberg/test/CMakeLists.txt b/src/iceberg/test/CMakeLists.txt
index a32bbe4d..e93852aa 100644
--- a/src/iceberg/test/CMakeLists.txt
+++ b/src/iceberg/test/CMakeLists.txt
@@ -108,6 +108,7 @@ add_iceberg_test(util_test
                  location_util_test.cc
                  string_util_test.cc
                  truncate_util_test.cc
+                 url_encoder_test.cc
                  uuid_test.cc
                  visit_type_test.cc)
 
diff --git a/src/iceberg/test/meson.build b/src/iceberg/test/meson.build
index 37818281..50422ccc 100644
--- a/src/iceberg/test/meson.build
+++ b/src/iceberg/test/meson.build
@@ -88,6 +88,7 @@ iceberg_tests = {
             'location_util_test.cc',
             'string_util_test.cc',
             'truncate_util_test.cc',
+            'url_encoder_test.cc',
             'uuid_test.cc',
             'visit_type_test.cc',
         ),
diff --git a/src/iceberg/test/url_encoder_test.cc 
b/src/iceberg/test/url_encoder_test.cc
new file mode 100644
index 00000000..fe22a9bb
--- /dev/null
+++ b/src/iceberg/test/url_encoder_test.cc
@@ -0,0 +1,83 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include "iceberg/util/url_encoder.h"
+
+#include <gtest/gtest.h>
+
+#include "iceberg/test/matchers.h"
+
+namespace iceberg {
+
+TEST(UrlEncoderTest, Encode) {
+  // RFC 3986 unreserved characters should not be encoded
+  EXPECT_THAT(UrlEncoder::Encode("abc123XYZ"), ::testing::Eq("abc123XYZ"));
+  EXPECT_THAT(UrlEncoder::Encode("test-file_name.txt~backup"),
+              ::testing::Eq("test-file_name.txt~backup"));
+
+  // Spaces and special characters should be encoded
+  EXPECT_THAT(UrlEncoder::Encode("hello world"), 
::testing::Eq("hello%20world"));
+  EXPECT_THAT(UrlEncoder::Encode("[email protected]"),
+              ::testing::Eq("test%40example.com"));
+  EXPECT_THAT(UrlEncoder::Encode("path/to/file"), 
::testing::Eq("path%2Fto%2Ffile"));
+  EXPECT_THAT(UrlEncoder::Encode("key=value&foo=bar"),
+              ::testing::Eq("key%3Dvalue%26foo%3Dbar"));
+  EXPECT_THAT(UrlEncoder::Encode("100%"), ::testing::Eq("100%25"));
+  EXPECT_THAT(UrlEncoder::Encode("hello\x1fworld"), 
::testing::Eq("hello%1Fworld"));
+  EXPECT_THAT(UrlEncoder::Encode(""), ::testing::Eq(""));
+}
+
+TEST(UrlEncoderTest, Decode) {
+  // Decode percent-encoded strings
+  EXPECT_THAT(UrlEncoder::Decode("hello%20world"), ::testing::Eq("hello 
world"));
+  EXPECT_THAT(UrlEncoder::Decode("test%40example.com"),
+              ::testing::Eq("[email protected]"));
+  EXPECT_THAT(UrlEncoder::Decode("path%2fto%2Ffile"), 
::testing::Eq("path/to/file"));
+  EXPECT_THAT(UrlEncoder::Decode("key%3dvalue%26foo%3Dbar"),
+              ::testing::Eq("key=value&foo=bar"));
+  EXPECT_THAT(UrlEncoder::Decode("100%25"), ::testing::Eq("100%"));
+
+  // ASCII Unit Separator (0x1F)
+  EXPECT_THAT(UrlEncoder::Decode("hello%1Fworld"), 
::testing::Eq("hello\x1Fworld"));
+
+  // Unreserved characters remain unchanged
+  EXPECT_THAT(UrlEncoder::Decode("test-file_name.txt~backup"),
+              ::testing::Eq("test-file_name.txt~backup"));
+  EXPECT_THAT(UrlEncoder::Decode(""), ::testing::Eq(""));
+}
+
+TEST(UrlEncoderTest, EncodeDecodeRoundTrip) {
+  std::vector<std::string> test_cases = {"hello world",
+                                         "[email protected]",
+                                         "path/to/file",
+                                         "key=value&foo=bar",
+                                         "100%",
+                                         "hello\x1Fworld",
+                                         "special!@#$%^&*()chars",
+                                         "mixed-123_test.file~ok",
+                                         ""};
+
+  for (const auto& test : test_cases) {
+    std::string encoded = UrlEncoder::Encode(test);
+    std::string decoded = UrlEncoder::Decode(encoded);
+    EXPECT_EQ(decoded, test) << "Round-trip failed for: " << test;
+  }
+}
+
+}  // namespace iceberg
diff --git a/src/iceberg/util/meson.build b/src/iceberg/util/meson.build
index 880f6340..b3866b70 100644
--- a/src/iceberg/util/meson.build
+++ b/src/iceberg/util/meson.build
@@ -38,6 +38,7 @@ install_headers(
         'timepoint.h',
         'truncate_util.h',
         'type_util.h',
+        'url_encoder.h',
         'uuid.h',
         'visitor_generate.h',
         'visit_type.h',
diff --git a/src/iceberg/util/url_encoder.cc b/src/iceberg/util/url_encoder.cc
new file mode 100644
index 00000000..a6ae4fd8
--- /dev/null
+++ b/src/iceberg/util/url_encoder.cc
@@ -0,0 +1,84 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include "iceberg/util/url_encoder.h"
+
+#include <locale>
+
+namespace iceberg {
+
+namespace {
+
+bool IsUnreserved(unsigned char c) {
+  return (c >= '0' && c <= '9') || (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 
'Z') ||
+         c == '-' || c == '.' || c == '_' || c == '~';
+}
+
+// Helper: convert hex char to int (0–15), returns -1 if invalid
+constexpr int8_t FromHex(char c) {
+  if (c >= '0' && c <= '9') return c - '0';
+  if (c >= 'A' && c <= 'F') return c - 'A' + 10;
+  if (c >= 'a' && c <= 'f') return c - 'a' + 10;
+  return -1;
+}
+
+}  // namespace
+
+std::string UrlEncoder::Encode(std::string_view str_to_encode) {
+  static const char* kHexChars = "0123456789ABCDEF";
+  std::string result;
+  result.reserve(str_to_encode.size() * 3 / 2 /* Heuristic reservation */);
+
+  for (char c : str_to_encode) {
+    if (IsUnreserved(c)) {
+      result += c;
+    } else {
+      result += '%';
+      result += kHexChars[c >> 4];
+      result += kHexChars[c & 0xF];
+    }
+  }
+
+  return result;
+}
+
+std::string UrlEncoder::Decode(std::string_view str_to_decode) {
+  std::string result;
+  result.reserve(str_to_decode.size());
+
+  for (size_t i = 0; i < str_to_decode.size(); ++i) {
+    char c = str_to_decode[i];
+    if (c == '%' && i + 2 < str_to_decode.size()) {
+      int8_t hi = FromHex(str_to_decode[i + 1]);
+      int8_t lo = FromHex(str_to_decode[i + 2]);
+
+      if (hi != -1 && lo != -1) {
+        result += static_cast<char>((hi << 4) | lo);
+        i += 2;
+        continue;
+      }
+    }
+    // Not a valid %XX sequence, copy as-is
+    result += c;
+  }
+
+  return result;
+}
+
+}  // namespace iceberg
diff --git a/src/iceberg/util/url_encoder.h b/src/iceberg/util/url_encoder.h
new file mode 100644
index 00000000..c6b8dd70
--- /dev/null
+++ b/src/iceberg/util/url_encoder.h
@@ -0,0 +1,53 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#pragma once
+
+#include <string>
+#include <string_view>
+
+#include "iceberg/iceberg_export.h"
+
+/// \file iceberg/util/url_encoder.h
+/// \brief URL encoding and decoding.
+
+namespace iceberg {
+
+/// \brief Utilities for encoding and decoding URLs.
+class ICEBERG_EXPORT UrlEncoder {
+ public:
+  /// \brief URL-encode a string.
+  ///
+  /// \details This is a simple implementation of url-encode
+  /// - Unreserved characters: [A-Z], [a-z], [0-9], "-", "_", ".", "~"
+  /// - Space is encoded as "%20" (unlike Java's URLEncoder which uses "+").
+  /// - All other characters are percent-encoded (%XX).
+  /// \param str_to_encode The string to encode.
+  /// \return The URL-encoded string.
+  static std::string Encode(std::string_view str_to_encode);
+
+  /// \brief URL-decode a string.
+  ///
+  /// \details Decodes percent-encoded characters (e.g., "%20" -> space).
+  /// \param str_to_decode The encoded string to decode.
+  /// \return The decoded string.
+  static std::string Decode(std::string_view str_to_decode);
+};
+
+}  // namespace iceberg

Reply via email to